R/utils.R
sparse_cor.Rd
Safe correlation function which returns a sparse matrix without missing values
sparse_cor(
x,
y = NULL,
method = "pearson",
allow_neg = TRUE,
remove_na = TRUE,
remove_inf = TRUE,
...
)
Sparse matrix or character vector.
Sparse matrix or character vector.
Method to use for calculating the correlation coefficient.
Logical. Whether to allow negative values or set them to 0.
Logical. Whether to replace NA values with 0.
Logical. Whether to replace infinite values with 1.
Other arguments passed to cor
function.
A correlation matrix.
m1 <- simulate_sparse_matrix(
1000, 1000,
density = 0.01
)
m2 <- simulate_sparse_matrix(
1000, 500,
density = 0.01
)
all.equal(
as.matrix(sparse_cor(m1)),
cor(as_matrix(m1))
)
#> [1] TRUE
all.equal(
as.matrix(sparse_cor(m1, m2)),
cor(as_matrix(m1), as_matrix(m2))
)
#> [1] TRUE
system.time(
sparse_cor(m1)
)
#> user system elapsed
#> 0.038 0.011 0.049
system.time(
cor(as_matrix(m1))
)
#> user system elapsed
#> 1.065 0.002 1.066
system.time(
sparse_cor(m1, m2)
)
#> user system elapsed
#> 0.014 0.042 0.014
system.time(
cor(as_matrix(m1), as_matrix(m2))
)
#> user system elapsed
#> 0.971 0.000 0.971
# add missing values
m1[sample(1:500, 10)] <- NA
m2[sample(1:500, 10)] <- NA
sparse_cor(m1, m2)[1:5, 1:5]
#> 5 x 5 sparse Matrix of class "dgCMatrix"
#> gene_1 gene_2 gene_3 gene_4 gene_5
#> gene_1 . . . . .
#> gene_2 . -0.005809341 -0.005139148 -0.006487946 -0.003243621
#> gene_3 . -0.010749484 -0.009509372 -0.012005161 -0.006001929
#> gene_4 . -0.009950452 -0.008802521 -0.011112792 -0.005555794
#> gene_5 . -0.007806599 -0.006905993 -0.008718510 -0.004358782