R/collapseFullCoverage.R
collapseFullCoverage.Rd
For a given data set this function collapses the full coverage information for each sample from all the chromosomes. The resulting information per sample is the number of bases with coverage 0, 1, etc. It is similar to using table() on a regular vector. This information is then used by sampleDepth for calculating the sample depth adjustments. The data set can loaded to R using (see fullCoverage) and optionally filtered using filterData.
collapseFullCoverage(fullCov, colsubset = NULL, save = FALSE, ...)
A list where each element is the result from
loadCoverage used with cutoff=NULL
. Can be
generated using fullCoverage.
Which columns of coverageInfo$coverage
to use.
If TRUE
, the result is saved as 'collapsedFull.Rdata'.
Arguments passed to other methods and/or advanced arguments. Advanced arguments:
If TRUE
basic status updates will be printed along
the way. Default: FALSE
.
A list with one element per sample. Then per sample, a list with two vector
elements: values
and weights
. The first one is the coverage
value and the second one is the number of bases with that value.
## Collapse the coverage information for the filtered data
collapsedFull <- collapseFullCoverage(list(genomeData),
verbose = TRUE
)
#> 2023-05-07 06:01:10.425638 collapseFullCoverage: Sorting fullCov
#> 2023-05-07 06:01:10.428074 collapseFullCoverage: Collapsing chromosomes information by sample
collapsedFull
#> $ERR009101
#> $ERR009101$values
#> [1] 0 1 2
#>
#> $ERR009101$weights
#> [1] 1105 298 31
#>
#>
#> $ERR009102
#> $ERR009102$values
#> [1] 0 1 2
#>
#> $ERR009102$weights
#> [1] 1216 216 2
#>
#>
#> $ERR009105
#> $ERR009105$values
#> [1] 0 1 2 3 4 5 6 7
#>
#> $ERR009105$weights
#> [1] 726 226 204 67 52 93 38 28
#>
#>
#> $ERR009107
#> $ERR009107$values
#> [1] 0 1 2 3 4 5 6
#>
#> $ERR009107$weights
#> [1] 1156 125 115 2 3 17 16
#>
#>
#> $ERR009108
#> $ERR009108$values
#> [1] 0 1 2 3
#>
#> $ERR009108$weights
#> [1] 1193 74 131 36
#>
#>
#> $ERR009112
#> $ERR009112$values
#> [1] 0 1 2
#>
#> $ERR009112$weights
#> [1] 1161 258 15
#>
#>
#> $ERR009115
#> $ERR009115$values
#> [1] 0 1
#>
#> $ERR009115$weights
#> [1] 1396 38
#>
#>
#> $ERR009116
#> $ERR009116$values
#> [1] 0 1 2
#>
#> $ERR009116$weights
#> [1] 1120 269 45
#>
#>
#> $ERR009131
#> $ERR009131$values
#> [1] 0 1 2
#>
#> $ERR009131$weights
#> [1] 1299 122 13
#>
#>
#> $ERR009138
#> $ERR009138$values
#> [1] 0 1 2 3
#>
#> $ERR009138$weights
#> [1] 1089 306 25 14
#>
#>
#> $ERR009144
#> $ERR009144$values
#> [1] 0 1 2
#>
#> $ERR009144$weights
#> [1] 1097 232 105
#>
#>
#> $ERR009145
#> $ERR009145$values
#> [1] 0 1
#>
#> $ERR009145$weights
#> [1] 1360 74
#>
#>
#> $ERR009148
#> $ERR009148$values
#> [1] 0 1 2 3
#>
#> $ERR009148$weights
#> [1] 1212 133 76 13
#>
#>
#> $ERR009151
#> $ERR009151$values
#> [1] 0 1 2 3 4 5
#>
#> $ERR009151$weights
#> [1] 972 186 172 62 38 4
#>
#>
#> $ERR009152
#> $ERR009152$values
#> [1] 0 1 2
#>
#> $ERR009152$weights
#> [1] 1059 232 143
#>
#>
#> $ERR009153
#> $ERR009153$values
#> [1] 0 1 2 3 4
#>
#> $ERR009153$weights
#> [1] 1104 275 45 9 1
#>
#>
#> $ERR009159
#> $ERR009159$values
#> [1] 0 1 2 3 4 5
#>
#> $ERR009159$weights
#> [1] 975 169 154 63 69 4
#>
#>
#> $ERR009161
#> $ERR009161$values
#> [1] 0 1 2 3 4
#>
#> $ERR009161$weights
#> [1] 1243 96 61 30 4
#>
#>
#> $ERR009163
#> $ERR009163$values
#> [1] 0 1
#>
#> $ERR009163$weights
#> [1] 1360 74
#>
#>
#> $ERR009164
#> $ERR009164$values
#> [1] 0 1 2 3
#>
#> $ERR009164$weights
#> [1] 1336 32 46 20
#>
#>
#> $ERR009167
#> $ERR009167$values
#> [1] 0 1 2 3 4 5 6 7 8 9
#>
#> $ERR009167$weights
#> [1] 728 200 181 140 79 17 25 23 24 17
#>
#>
#> $SRR031812
#> $SRR031812$values
#> [1] 0 1
#>
#> $SRR031812$weights
#> [1] 1399 35
#>
#>
#> $SRR031835
#> $SRR031835$values
#> [1] 0 1
#>
#> $SRR031835$weights
#> [1] 1388 46
#>
#>
#> $SRR031867
#> $SRR031867$values
#> [1] 0
#>
#> $SRR031867$weights
#> [1] 1434
#>
#>
#> $SRR031868
#> $SRR031868$values
#> [1] 0
#>
#> $SRR031868$weights
#> [1] 1434
#>
#>
#> $SRR031900
#> $SRR031900$values
#> [1] 0
#>
#> $SRR031900$weights
#> [1] 1434
#>
#>
#> $SRR031904
#> $SRR031904$values
#> [1] 0 1
#>
#> $SRR031904$weights
#> [1] 1388 46
#>
#>
#> $SRR031914
#> $SRR031914$values
#> [1] 0
#>
#> $SRR031914$weights
#> [1] 1434
#>
#>
#> $SRR031936
#> $SRR031936$values
#> [1] 0
#>
#> $SRR031936$weights
#> [1] 1434
#>
#>
#> $SRR031958
#> $SRR031958$values
#> [1] 0
#>
#> $SRR031958$weights
#> [1] 1434
#>
#>
#> $SRR031960
#> $SRR031960$values
#> [1] 0
#>
#> $SRR031960$weights
#> [1] 1434
#>
#>
if (FALSE) {
## You can also collapsed the raw data
collapsedFullRaw <- collapseFullCoverage(list(genomeDataRaw), verbose = TRUE)
}