For each data point, label a mutation as "ref"
, "alt"
, "ins"
, or
"del"
.
Data points that have more reference (REF) than alternate (ALT) calls will be labeled as
"ref"
.Mutations whose REF and ALT calls are both one base pair and who have more ALT calls than REF calls will be labeled as
"alt"
.Mutations whose REF and ALT calls differ in length and have more ALT calls than REF calls will be labeled as
"ins"
.Mutations whose REF and ALT calls differ in length and have more REF calls than ALT calls will be labeled as
"del"
.
Examples
data <- tibble::tribble(
~sample, ~pos, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count, ~coverage,
"S1", "1049838", "A", "G", 54, 10, 64,
"S2", "801498", "G", "A", 15, 0, 15,
"S3", "625403", "T", "C", 0, 15, 15,
"S4", "748165", "GA", "G", 2, 18, 20,
"S5", "487199", "G", "ATC", 0, 10, 10
)
label_mutations(data)
#> # A tibble: 5 × 8
#> sample pos ref alt ref_umi_count alt_umi_count coverage ans_der_indel
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr>
#> 1 S1 1049838 A G 54 10 64 ref
#> 2 S2 801498 G A 15 0 15 ref
#> 3 S3 625403 T C 0 15 15 alt
#> 4 S4 748165 GA G 2 18 20 del
#> 5 S5 487199 G ATC 0 10 10 ins
label_mutations(data, .after = alt)
#> # A tibble: 5 × 8
#> sample pos ref alt ans_der_indel ref_umi_count alt_umi_count coverage
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 S1 1049838 A G ref 54 10 64
#> 2 S2 801498 G A ref 15 0 15
#> 3 S3 625403 T C alt 0 15 15
#> 4 S4 748165 GA G del 2 18 20
#> 5 S5 487199 G ATC ins 0 10 10
label_mutations(data, .before = pos)
#> # A tibble: 5 × 8
#> sample ans_der_indel pos ref alt ref_umi_count alt_umi_count coverage
#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 S1 ref 1049838 A G 54 10 64
#> 2 S2 ref 801498 G A 15 0 15
#> 3 S3 alt 625403 T C 0 15 15
#> 4 S4 del 748165 GA G 2 18 20
#> 5 S5 ins 487199 G ATC 0 10 10