Skip to main content

Statistics Module 📊

API reference for the memory::statistics module, which provides statistical operations for single-cell RNA-seq data analysis.

Core Statistics Functions

compute_qc_variables

Computes quality control (QC) statistics for an AnnData object's expression matrix.

pub fn compute_qc_variables<I, T>(
adata: &IMAnnData
) -> anyhow::Result<StatisticsContainer<I, T>>
where
I: PrimInt + Unsigned + Zero + AddAssign,
T: Float + NumCast + AddAssign + Sum + From<I>

Type Parameters

  • I: Integer type for counting non-zero elements (typically u32)
  • T: Floating-point type for numerical computations (typically f64)

Parameters

  • adata: Reference to an IMAnnData object containing the expression matrix

Returns

  • anyhow::Result<StatisticsContainer<I, T>>: Container with calculated statistics

Example

let qc_stats = compute_qc_variables::<u32, f64>(&adata)?;

// Access the computed statistics
let genes_per_cell = &qc_stats.num_per_cell;
let cells_per_gene = &qc_stats.num_per_gene;
let total_expr_per_gene = &qc_stats.expr_per_gene;

StatisticsContainer

A structure that holds various statistical metrics computed from expression data.

pub struct StatisticsContainer<I, T>
where
I: PrimInt + Unsigned + Zero + AddAssign,
T: Float + NumCast + AddAssign + Sum + From<I>
{
pub num_per_gene: Vec<I>, // Number of cells expressing each gene
pub expr_per_gene: Vec<T>, // Total expression per gene
pub num_per_cell: Vec<I>, // Number of genes expressed per cell
pub expr_per_cell: Vec<T>, // Total expression per cell
pub variance_per_gene: Vec<T>, // Expression variance per gene
pub variance_per_cell: Vec<T>, // Expression variance per cell
pub std_dev_per_gene: Vec<T>, // Expression standard deviation per gene
pub std_dev_per_cell: Vec<T>, // Expression standard deviation per cell
}

Trait Implementations

The statistics module implements several traits that provide statistical operations for different data types. These traits are implemented for IMArrayElement (in-memory array elements).

ComputeNonZero

Computes the number of non-zero elements along a specified dimension.

nonzero_whole

fn nonzero_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
T: PrimInt + Unsigned + Zero + AddAssign
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
Returns
  • anyhow::Result<Vec<T>>: Vector of counts
Example
// Count non-zero elements per cell
let genes_per_cell = matrix.nonzero_whole::<u32>(&Direction::ROW)?;

// Count non-zero elements per gene
let cells_per_gene = matrix.nonzero_whole::<u32>(&Direction::COLUMN)?;

nonzero_chunk

fn nonzero_chunk<T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
T: PrimInt + Unsigned + Zero + AddAssign
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
  • reference: Pre-allocated vector to store results
Returns
  • anyhow::Result<()>: Success or error
Example
let mut counts = vec![0u32; adata.n_obs()];
matrix.nonzero_chunk::<u32>(&Direction::ROW, &mut counts)?;

ComputeSum

Computes the sum of elements along a specified dimension.

sum_whole

fn sum_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
T: Float + NumCast + AddAssign + Sum
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
Returns
  • anyhow::Result<Vec<T>>: Vector of sums
Example
// Calculate total expression per cell
let expr_per_cell = matrix.sum_whole::<f64>(&Direction::ROW)?;

// Calculate total expression per gene
let expr_per_gene = matrix.sum_whole::<f64>(&Direction::COLUMN)?;

sum_chunk

fn sum_chunk<T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
T: Float + NumCast + AddAssign + Sum
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
  • reference: Pre-allocated vector to store results
Returns
  • anyhow::Result<()>: Success or error
Example
let mut totals = vec![0.0f64; adata.n_obs()];
matrix.sum_chunk::<f64>(&Direction::ROW, &mut totals)?;

ComputeVariance

Computes the variance of elements along a specified dimension.

variance_whole

fn variance_whole<I, T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
I: PrimInt + Unsigned + Zero + AddAssign + Into<T>,
T: Float + NumCast + AddAssign + Sum
Type Parameters
  • I: Integer type for counting (typically u32)
  • T: Floating-point type for computations (typically f64)
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
Returns
  • anyhow::Result<Vec<T>>: Vector of variances
Example
// Calculate expression variance per gene
let var_per_gene = matrix.variance_whole::<u32, f64>(&Direction::COLUMN)?;

variance_chunk

fn variance_chunk<I, T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
I: PrimInt + Unsigned + Zero + AddAssign + Into<T>,
T: Float + NumCast + AddAssign + Sum
Type Parameters
  • I: Integer type for counting (typically u32)
  • T: Floating-point type for computations (typically f64)
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
  • reference: Pre-allocated vector to store results
Returns
  • anyhow::Result<()>: Success or error
Example
let mut var_values = vec![0.0f64; adata.n_vars()];
matrix.variance_chunk::<u32, f64>(&Direction::COLUMN, &mut var_values)?;

ComputeMinMax

Computes the minimum and maximum values along a specified dimension.

min_max_whole

fn min_max_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<(Vec<T>, Vec<T>)>
where
T: NumCast + Copy + PartialOrd + NumericOps
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
Returns
  • anyhow::Result<(Vec<T>, Vec<T>)>: Pair of vectors (min values, max values)
Example
// Get min and max expression values per gene
let (min_per_gene, max_per_gene) = matrix.min_max_whole::<f64>(&Direction::COLUMN)?;

min_max_chunk

fn min_max_chunk<T>(
&self,
direction: &Direction,
reference: (&mut Vec<T>, &mut Vec<T>)
) -> anyhow::Result<()>
where
T: NumCast + Copy + PartialOrd + NumericOps
Parameters
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
  • reference: Pair of pre-allocated vectors to store results (min values, max values)
Returns
  • anyhow::Result<()>: Success or error
Example
let mut min_values = vec![f64::INFINITY; adata.n_vars()];
let mut max_values = vec![f64::NEG_INFINITY; adata.n_vars()];
matrix.min_max_chunk::<f64>(&Direction::COLUMN, (&mut min_values, &mut max_values))?;

Helper Functions

The module also contains internal helper functions in the number, sum, variance, minmax, and stddev submodules that are used by the trait implementations. These are not typically accessed directly by users.

Utility Functions

stddev::whole

Computes the standard deviation of elements along a specified dimension.

pub fn whole(
arrayd: &ArrayData,
direction: Direction
) -> anyhow::Result<Vec<f64>>
Parameters
  • arrayd: Array data
  • direction: Direction for computation (Direction::ROW or Direction::COLUMN)
Returns
  • anyhow::Result<Vec<f64>>: Vector of standard deviations
Example
use single_rust::shared::statistics::stddev;

let std_devs = stddev::whole(&array_data, Direction::COLUMN)?;

Usage Example

use single_rust::memory::statistics::compute_qc_variables;
use single_rust::shared::ComputeSum;
use single_rust::shared::ComputeVariance;
use single_rust::shared::Direction;

// Compute common QC metrics
let qc_stats = compute_qc_variables::<u32, f64>(&adata)?;

// Calculate mean expression per gene
let n_cells = adata.n_obs() as f64;
let means: Vec<f64> = qc_stats.expr_per_gene
.iter()
.map(|&sum| sum / n_cells)
.collect();

// Calculate coefficient of variation (CV) per gene
let cv: Vec<f64> = qc_stats.std_dev_per_gene
.iter()
.zip(means.iter())
.map(|(&std, &mean)| if mean > 0.0 { std / mean } else { 0.0 })
.collect();

// Direct computation using the traits
let matrix = adata.x();
let variances: Vec<f64> = matrix.variance_whole::<u32, f64>(&Direction::COLUMN)?;
let total_counts: Vec<f64> = matrix.sum_whole::<f64>(&Direction::ROW)?;