3 Objetos de Bioconductor para datos de expresión

3.1 SummarizedExperiment

Original notes in English

## Lets build our first SummarizedExperiment object
library("SummarizedExperiment")
## ?SummarizedExperiment

## De los ejemplos en la ayuda oficial

## Creamos los datos para nuestro objeto de tipo SummarizedExperiment
## para 200 genes a lo largo de 6 muestras
nrows <- 200
ncols <- 6
## Números al azar de cuentas
set.seed(20210223)
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
## Información de nuestros genes
rowRanges <- GRanges(
    rep(c("chr1", "chr2"), c(50, 150)),
    IRanges(floor(runif(200, 1e5, 1e6)), width = 100),
    strand = sample(c("+", "-"), 200, TRUE),
    feature_id = sprintf("ID%03d", 1:200)
)
names(rowRanges) <- paste0("gene_", seq_len(length(rowRanges)))
## Información de nuestras muestras
colData <- DataFrame(
    Treatment = rep(c("ChIP", "Input"), 3),
    row.names = LETTERS[1:6]
)
## Juntamos ahora toda la información en un solo objeto de R
rse <- SummarizedExperiment(
    assays = SimpleList(counts = counts),
    rowRanges = rowRanges,
    colData = colData
)

## Exploremos el objeto resultante
rse
## class: RangedSummarizedExperiment 
## dim: 200 6 
## metadata(0):
## assays(1): counts
## rownames(200): gene_1 gene_2 ... gene_199 gene_200
## rowData names(1): feature_id
## colnames(6): A B ... E F
## colData names(1): Treatment
## Número de genes y muestras
dim(rse)
## [1] 200   6
## IDs de nuestros genes y muestras
dimnames(rse)
## [[1]]
##   [1] "gene_1"   "gene_2"   "gene_3"   "gene_4"   "gene_5"   "gene_6"   "gene_7"   "gene_8"   "gene_9"   "gene_10" 
##  [11] "gene_11"  "gene_12"  "gene_13"  "gene_14"  "gene_15"  "gene_16"  "gene_17"  "gene_18"  "gene_19"  "gene_20" 
##  [21] "gene_21"  "gene_22"  "gene_23"  "gene_24"  "gene_25"  "gene_26"  "gene_27"  "gene_28"  "gene_29"  "gene_30" 
##  [31] "gene_31"  "gene_32"  "gene_33"  "gene_34"  "gene_35"  "gene_36"  "gene_37"  "gene_38"  "gene_39"  "gene_40" 
##  [41] "gene_41"  "gene_42"  "gene_43"  "gene_44"  "gene_45"  "gene_46"  "gene_47"  "gene_48"  "gene_49"  "gene_50" 
##  [51] "gene_51"  "gene_52"  "gene_53"  "gene_54"  "gene_55"  "gene_56"  "gene_57"  "gene_58"  "gene_59"  "gene_60" 
##  [61] "gene_61"  "gene_62"  "gene_63"  "gene_64"  "gene_65"  "gene_66"  "gene_67"  "gene_68"  "gene_69"  "gene_70" 
##  [71] "gene_71"  "gene_72"  "gene_73"  "gene_74"  "gene_75"  "gene_76"  "gene_77"  "gene_78"  "gene_79"  "gene_80" 
##  [81] "gene_81"  "gene_82"  "gene_83"  "gene_84"  "gene_85"  "gene_86"  "gene_87"  "gene_88"  "gene_89"  "gene_90" 
##  [91] "gene_91"  "gene_92"  "gene_93"  "gene_94"  "gene_95"  "gene_96"  "gene_97"  "gene_98"  "gene_99"  "gene_100"
## [101] "gene_101" "gene_102" "gene_103" "gene_104" "gene_105" "gene_106" "gene_107" "gene_108" "gene_109" "gene_110"
## [111] "gene_111" "gene_112" "gene_113" "gene_114" "gene_115" "gene_116" "gene_117" "gene_118" "gene_119" "gene_120"
## [121] "gene_121" "gene_122" "gene_123" "gene_124" "gene_125" "gene_126" "gene_127" "gene_128" "gene_129" "gene_130"
## [131] "gene_131" "gene_132" "gene_133" "gene_134" "gene_135" "gene_136" "gene_137" "gene_138" "gene_139" "gene_140"
## [141] "gene_141" "gene_142" "gene_143" "gene_144" "gene_145" "gene_146" "gene_147" "gene_148" "gene_149" "gene_150"
## [151] "gene_151" "gene_152" "gene_153" "gene_154" "gene_155" "gene_156" "gene_157" "gene_158" "gene_159" "gene_160"
## [161] "gene_161" "gene_162" "gene_163" "gene_164" "gene_165" "gene_166" "gene_167" "gene_168" "gene_169" "gene_170"
## [171] "gene_171" "gene_172" "gene_173" "gene_174" "gene_175" "gene_176" "gene_177" "gene_178" "gene_179" "gene_180"
## [181] "gene_181" "gene_182" "gene_183" "gene_184" "gene_185" "gene_186" "gene_187" "gene_188" "gene_189" "gene_190"
## [191] "gene_191" "gene_192" "gene_193" "gene_194" "gene_195" "gene_196" "gene_197" "gene_198" "gene_199" "gene_200"
## 
## [[2]]
## [1] "A" "B" "C" "D" "E" "F"
## Nombres de tablas de cuentas que tenemos (RPKM, CPM, counts, logcounts, etc)
assayNames(rse)
## [1] "counts"
## El inicio de nuestra tabla de cuentas
head(assay(rse))
##               A        B         C         D         E        F
## gene_1 2577.960 8526.615 2226.3070 3615.8967 1723.8851 3267.954
## gene_2 7793.183 3462.579  478.2716 7688.3839  295.2813 2698.921
## gene_3 9571.769 5280.564 9772.1671 9916.0076 2621.2085 6880.067
## gene_4 4641.969 2784.091 6670.6757  258.5218 2771.8970 8737.586
## gene_5 6436.758 7053.276 9978.8199 3588.1194 1447.9821 7290.890
## gene_6 6845.704 1502.045 4383.5686 9750.8286 3529.3153 2192.060
## Información de los genes en un objeto de Bioconductor
rowRanges(rse)
## GRanges object with 200 ranges and 1 metadata column:
##            seqnames        ranges strand |  feature_id
##               <Rle>     <IRanges>  <Rle> | <character>
##     gene_1     chr1 286235-286334      + |       ID001
##     gene_2     chr1 586770-586869      - |       ID002
##     gene_3     chr1 577897-577996      + |       ID003
##     gene_4     chr1 494350-494449      + |       ID004
##     gene_5     chr1 686692-686791      - |       ID005
##        ...      ...           ...    ... .         ...
##   gene_196     chr2 804998-805097      - |       ID196
##   gene_197     chr2 177462-177561      - |       ID197
##   gene_198     chr2 649993-650092      - |       ID198
##   gene_199     chr2 275940-276039      - |       ID199
##   gene_200     chr2 487418-487517      + |       ID200
##   -------
##   seqinfo: 2 sequences from an unspecified genome; no seqlengths
## Tabla con información de los genes
rowData(rse) # es idéntico a 'mcols(rowRanges(rse))'
## DataFrame with 200 rows and 1 column
##           feature_id
##          <character>
## gene_1         ID001
## gene_2         ID002
## gene_3         ID003
## gene_4         ID004
## gene_5         ID005
## ...              ...
## gene_196       ID196
## gene_197       ID197
## gene_198       ID198
## gene_199       ID199
## gene_200       ID200
## Tabla con información de las muestras
colData(rse)
## DataFrame with 6 rows and 1 column
##     Treatment
##   <character>
## A        ChIP
## B       Input
## C        ChIP
## D       Input
## E        ChIP
## F       Input

3.2 Ejercicio

Explica que sucede en las siguientes líneas de código de R.

## Comando 1
rse[1:2, ]
## class: RangedSummarizedExperiment 
## dim: 2 6 
## metadata(0):
## assays(1): counts
## rownames(2): gene_1 gene_2
## rowData names(1): feature_id
## colnames(6): A B ... E F
## colData names(1): Treatment
## Comando 2
rse[, c("A", "D", "F")]
## class: RangedSummarizedExperiment 
## dim: 200 3 
## metadata(0):
## assays(1): counts
## rownames(200): gene_1 gene_2 ... gene_199 gene_200
## rowData names(1): feature_id
## colnames(3): A D F
## colData names(1): Treatment

3.4 Ejercicio con spatialLIBD

## Descarguemos unos datos de spatialLIBD
sce_layer <- spatialLIBD::fetch_data("sce_layer")
## snapshotDate(): 2021-10-19
## adding rname 'https://www.dropbox.com/s/bg8xwysh2vnjwvg/Human_DLPFC_Visium_processedData_sce_scran_sce_layer_spatialLIBD.Rdata?dl=1'
## 2022-02-04 17:18:34 loading file /github/home/.cache/R/BiocFileCache/48c2ec1cfa8_Human_DLPFC_Visium_processedData_sce_scran_sce_layer_spatialLIBD.Rdata%3Fdl%3D1
sce_layer
## class: SingleCellExperiment 
## dim: 22331 76 
## metadata(0):
## assays(2): counts logcounts
## rownames(22331): ENSG00000243485 ENSG00000238009 ... ENSG00000278384 ENSG00000271254
## rowData names(10): source type ... is_top_hvg is_top_hvg_sce_layer
## colnames(76): 151507_Layer1 151507_Layer2 ... 151676_Layer6 151676_WM
## colData names(13): sample_name layer_guess ... layer_guess_reordered_short spatialLIBD
## reducedDimNames(6): PCA TSNE_perplexity5 ... UMAP_neighbors15 PCAsub
## mainExpName: NULL
## altExpNames(0):
## Revisemos el tamaño de este objeto
lobstr::obj_size(sce_layer) / 1024^2 ## Convertir a MB
## 32.41268 B
  • Al igual que nuestro objeto rse podemos usar iSEE::iSEE() para explorar los datos.
iSEE::iSEE(sce_layer)
  • Descarga un PDF que reproduzca la imagen del lado derecho de la siguiente diapositiva. Incluye ese PDF en tu repositorio de notas del curso.
  • Explora en con un heatmap la expresión de los genes MOBP, MBP y PCP4. Si hacemos un clustering (agrupamos los genes), ¿cúales genes se parecen más?
  • ¿En qué capas se expresan más los genes MOBP y MBP?
ENSG00000168314
ENSG00000183036
ENSG00000197971

3.5 Comunidad

Autores de iSEE:

© 2011-2020. All thoughts and opinions here are my own. The icon was designed by Mauricio Guzmán and is inspired by Huichol culture; it represents my community building interests.

Published with Bookdown