Statistics Module 📊
API reference for the memory::statistics
module, which provides statistical operations for single-cell RNA-seq data analysis.
Core Statistics Functions
compute_qc_variables
Computes quality control (QC) statistics for an AnnData object's expression matrix.
pub fn compute_qc_variables<I, T>(
adata: &IMAnnData
) -> anyhow::Result<StatisticsContainer<I, T>>
where
I: PrimInt + Unsigned + Zero + AddAssign,
T: Float + NumCast + AddAssign + Sum + From<I>
Type Parameters
I
: Integer type for counting non-zero elements (typicallyu32
)T
: Floating-point type for numerical computations (typicallyf64
)
Parameters
adata
: Reference to an IMAnnData object containing the expression matrix
Returns
anyhow::Result<StatisticsContainer<I, T>>
: Container with calculated statistics
Example
let qc_stats = compute_qc_variables::<u32, f64>(&adata)?;
// Access the computed statistics
let genes_per_cell = &qc_stats.num_per_cell;
let cells_per_gene = &qc_stats.num_per_gene;
let total_expr_per_gene = &qc_stats.expr_per_gene;
StatisticsContainer
A structure that holds various statistical metrics computed from expression data.
pub struct StatisticsContainer<I, T>
where
I: PrimInt + Unsigned + Zero + AddAssign,
T: Float + NumCast + AddAssign + Sum + From<I>
{
pub num_per_gene: Vec<I>, // Number of cells expressing each gene
pub expr_per_gene: Vec<T>, // Total expression per gene
pub num_per_cell: Vec<I>, // Number of genes expressed per cell
pub expr_per_cell: Vec<T>, // Total expression per cell
pub variance_per_gene: Vec<T>, // Expression variance per gene
pub variance_per_cell: Vec<T>, // Expression variance per cell
pub std_dev_per_gene: Vec<T>, // Expression standard deviation per gene
pub std_dev_per_cell: Vec<T>, // Expression standard deviation per cell
}
Trait Implementations
The statistics module implements several traits that provide statistical operations for different data types. These traits are implemented for IMArrayElement
(in-memory array elements).
ComputeNonZero
Computes the number of non-zero elements along a specified dimension.
nonzero_whole
fn nonzero_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
T: PrimInt + Unsigned + Zero + AddAssign
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)
Returns
anyhow::Result<Vec<T>>
: Vector of counts
Example
// Count non-zero elements per cell
let genes_per_cell = matrix.nonzero_whole::<u32>(&Direction::ROW)?;
// Count non-zero elements per gene
let cells_per_gene = matrix.nonzero_whole::<u32>(&Direction::COLUMN)?;
nonzero_chunk
fn nonzero_chunk<T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
T: PrimInt + Unsigned + Zero + AddAssign
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)reference
: Pre-allocated vector to store results
Returns
anyhow::Result<()>
: Success or error
Example
let mut counts = vec![0u32; adata.n_obs()];
matrix.nonzero_chunk::<u32>(&Direction::ROW, &mut counts)?;
ComputeSum
Computes the sum of elements along a specified dimension.
sum_whole
fn sum_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
T: Float + NumCast + AddAssign + Sum
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)
Returns
anyhow::Result<Vec<T>>
: Vector of sums
Example
// Calculate total expression per cell
let expr_per_cell = matrix.sum_whole::<f64>(&Direction::ROW)?;
// Calculate total expression per gene
let expr_per_gene = matrix.sum_whole::<f64>(&Direction::COLUMN)?;
sum_chunk
fn sum_chunk<T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
T: Float + NumCast + AddAssign + Sum
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)reference
: Pre-allocated vector to store results
Returns
anyhow::Result<()>
: Success or error
Example
let mut totals = vec![0.0f64; adata.n_obs()];
matrix.sum_chunk::<f64>(&Direction::ROW, &mut totals)?;
ComputeVariance
Computes the variance of elements along a specified dimension.
variance_whole
fn variance_whole<I, T>(
&self,
direction: &Direction
) -> anyhow::Result<Vec<T>>
where
I: PrimInt + Unsigned + Zero + AddAssign + Into<T>,
T: Float + NumCast + AddAssign + Sum
Type Parameters
I
: Integer type for counting (typicallyu32
)T
: Floating-point type for computations (typicallyf64
)
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)
Returns
anyhow::Result<Vec<T>>
: Vector of variances
Example
// Calculate expression variance per gene
let var_per_gene = matrix.variance_whole::<u32, f64>(&Direction::COLUMN)?;
variance_chunk
fn variance_chunk<I, T>(
&self,
direction: &Direction,
reference: &mut [T]
) -> anyhow::Result<()>
where
I: PrimInt + Unsigned + Zero + AddAssign + Into<T>,
T: Float + NumCast + AddAssign + Sum
Type Parameters
I
: Integer type for counting (typicallyu32
)T
: Floating-point type for computations (typicallyf64
)
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)reference
: Pre-allocated vector to store results
Returns
anyhow::Result<()>
: Success or error
Example
let mut var_values = vec![0.0f64; adata.n_vars()];
matrix.variance_chunk::<u32, f64>(&Direction::COLUMN, &mut var_values)?;
ComputeMinMax
Computes the minimum and maximum values along a specified dimension.
min_max_whole
fn min_max_whole<T>(
&self,
direction: &Direction
) -> anyhow::Result<(Vec<T>, Vec<T>)>
where
T: NumCast + Copy + PartialOrd + NumericOps
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)
Returns
anyhow::Result<(Vec<T>, Vec<T>)>
: Pair of vectors (min values, max values)
Example
// Get min and max expression values per gene
let (min_per_gene, max_per_gene) = matrix.min_max_whole::<f64>(&Direction::COLUMN)?;
min_max_chunk
fn min_max_chunk<T>(
&self,
direction: &Direction,
reference: (&mut Vec<T>, &mut Vec<T>)
) -> anyhow::Result<()>
where
T: NumCast + Copy + PartialOrd + NumericOps
Parameters
direction
: Direction for computation (Direction::ROW
orDirection::COLUMN
)reference
: Pair of pre-allocated vectors to store results (min values, max values)
Returns
anyhow::Result<()>
: Success or error
Example
let mut min_values = vec![f64::INFINITY; adata.n_vars()];
let mut max_values = vec![f64::NEG_INFINITY; adata.n_vars()];
matrix.min_max_chunk::<f64>(&Direction::COLUMN, (&mut min_values, &mut max_values))?;
Helper Functions
The module also contains internal helper functions in the number
, sum
, variance
, minmax
, and stddev
submodules that are used by the trait implementations. These are not typically accessed directly by users.
Utility Functions
stddev::whole
Computes the standard deviation of elements along a specified dimension.
pub fn whole(
arrayd: &ArrayData,
direction: Direction
) -> anyhow::Result<Vec<f64>>
Parameters
arrayd
: Array datadirection
: Direction for computation (Direction::ROW
orDirection::COLUMN
)
Returns
anyhow::Result<Vec<f64>>
: Vector of standard deviations
Example
use single_rust::shared::statistics::stddev;
let std_devs = stddev::whole(&array_data, Direction::COLUMN)?;
Usage Example
use single_rust::memory::statistics::compute_qc_variables;
use single_rust::shared::ComputeSum;
use single_rust::shared::ComputeVariance;
use single_rust::shared::Direction;
// Compute common QC metrics
let qc_stats = compute_qc_variables::<u32, f64>(&adata)?;
// Calculate mean expression per gene
let n_cells = adata.n_obs() as f64;
let means: Vec<f64> = qc_stats.expr_per_gene
.iter()
.map(|&sum| sum / n_cells)
.collect();
// Calculate coefficient of variation (CV) per gene
let cv: Vec<f64> = qc_stats.std_dev_per_gene
.iter()
.zip(means.iter())
.map(|(&std, &mean)| if mean > 0.0 { std / mean } else { 0.0 })
.collect();
// Direct computation using the traits
let matrix = adata.x();
let variances: Vec<f64> = matrix.variance_whole::<u32, f64>(&Direction::COLUMN)?;
let total_counts: Vec<f64> = matrix.sum_whole::<f64>(&Direction::ROW)?;