Construct a ContigCellDB

ContigCellDB(
  contig_tbl,
  contig_pk,
  cell_tbl,
  cell_pk,
  cluster_tbl,
  cluster_pk = character(),
  equalize = TRUE
)

ContigCellDB_10XVDJ(
  contig_tbl,
  contig_pk = c("barcode", "contig_id"),
  cell_pk = "barcode",
  ...
)

Arguments

contig_tbl

a data frame of contigs, and additional fields describing their properties

contig_pk

character vector naming fields in contig_tbl that uniquely identify a row/contig

cell_tbl

a data frame of cell barcodes, and (optional) additional fields describing their properties

cell_pk

character vector naming fields in cell_tbl that uniquely identify a cell barcode

cluster_tbl

A data frame that provide cluster assignments for each contig

cluster_pk

If cluster_tbl was provided, a character vector naming fields in cluster_tbl that uniquely identify a cluster

equalize

logical. Should the contig, cells and clusters be equalized by taking the intersection of their common keys?

...

passed to ContigCellDB()

Value

ContigCellDB

Functions

  • ContigCellDB_10XVDJ: provide defaults that correspond to identifiers in 10X VDJ data

Accessors/mutators

See $,ContigCellDB-method for more on how to access and mutate slots. See mutate_cdb() and filter_cdb() for endomorphic filtering/mutation methods See split_cdb() to split into a list, and rbind.ContigCellDB() for the inverse operation.

Examples

data(contigs_qc)
contigs_qc
#> # A tibble: 1,508 × 22
#>    anno_file pop   sample barcode is_cell contig_id high_confidence length chain
#>    <chr>     <chr> <chr>  <chr>   <lgl>   <chr>     <lgl>            <dbl> <chr>
#>  1 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               611 TRB  
#>  2 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               609 TRB  
#>  3 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               538 TRA  
#>  4 /Users/a… b6    4      AACCAT… TRUE    AACCATGC… TRUE               799 TRA  
#>  5 /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               634 TRB  
#>  6 /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               923 TRA  
#>  7 /Users/a… b6    4      AAGCCG… TRUE    AAGCCGCA… TRUE               693 TRB  
#>  8 /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               658 TRB  
#>  9 /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               558 TRA  
#> 10 /Users/a… b6    4      ACACCA… TRUE    ACACCAAA… TRUE               614 TRB  
#> # … with 1,498 more rows, and 13 more variables: v_gene <chr>, d_gene <chr>,
#> #   j_gene <chr>, c_gene <chr>, full_length <lgl>, productive <chr>,
#> #   cdr3 <chr>, cdr3_nt <chr>, reads <dbl>, umis <dbl>, raw_clonotype_id <chr>,
#> #   raw_consensus_id <chr>, celltype <chr>

cdb = ContigCellDB(contigs_qc, contig_pk = c('barcode', 'pop', 'sample', 'contig_id'),
 cell_pk = c('barcode', 'pop', 'sample'))
 cdb
#> ContigCellDB of 1508 contigs; 832 cells; and 0 clusters.
#> Contigs keyed by barcode, pop, sample, contig_id; cells keyed by barcode, pop, sample.

 # everything that was in contigs_qc
 cdb$contig_tbl
#> # A tibble: 1,508 × 22
#>    anno_file pop   sample barcode is_cell contig_id high_confidence length chain
#>    <chr>     <chr> <chr>  <chr>   <lgl>   <chr>     <lgl>            <dbl> <chr>
#>  1 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               611 TRB  
#>  2 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               609 TRB  
#>  3 /Users/a… b6    4      AAAGTA… TRUE    AAAGTAGT… TRUE               538 TRA  
#>  4 /Users/a… b6    4      AACCAT… TRUE    AACCATGC… TRUE               799 TRA  
#>  5 /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               634 TRB  
#>  6 /Users/a… b6    4      AACTGG… TRUE    AACTGGTG… TRUE               923 TRA  
#>  7 /Users/a… b6    4      AAGCCG… TRUE    AAGCCGCA… TRUE               693 TRB  
#>  8 /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               658 TRB  
#>  9 /Users/a… b6    4      AAGTCT… TRUE    AAGTCTGG… TRUE               558 TRA  
#> 10 /Users/a… b6    4      ACACCA… TRUE    ACACCAAA… TRUE               614 TRB  
#> # … with 1,498 more rows, and 13 more variables: v_gene <chr>, d_gene <chr>,
#> #   j_gene <chr>, c_gene <chr>, full_length <lgl>, productive <chr>,
#> #   cdr3 <chr>, cdr3_nt <chr>, reads <dbl>, umis <dbl>, raw_clonotype_id <chr>,
#> #   raw_consensus_id <chr>, celltype <chr>

 # Only the cell_pk are included by default (until clustering/canonicalization)
 cdb$cell_tbl
#> # A tibble: 832 × 3
#>    barcode            pop   sample
#>    <chr>              <chr> <chr> 
#>  1 AAAGTAGTCGCGCCAA-1 b6    4     
#>  2 AACCATGCATTTGCCC-1 b6    4     
#>  3 AACTGGTGTCTGATCA-1 b6    4     
#>  4 AAGCCGCAGTAAGTAC-1 b6    4     
#>  5 AAGTCTGGTTCAACCA-1 b6    4     
#>  6 ACACCAAAGTCCAGGA-1 b6    4     
#>  7 ACATGGTAGTGTTTGC-1 b6    4     
#>  8 ACCCACTTCCACGACG-1 b6    4     
#>  9 ACGCCAGGTCCGAATT-1 b6    4     
#> 10 ACGCCAGTCCAATGGT-1 b6    4     
#> # … with 822 more rows

 # Empty, since no cluster_pk was specified
 cdb$cluster_tbl
#> # A tibble: 0 × 0

 # Keys
 cdb$contig_pk
#> [1] "barcode"   "pop"       "sample"    "contig_id"
 cdb$cell_pk
#> [1] "barcode" "pop"     "sample" 
 cdb$cluster_pk
#> character(0)