1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
| protocol ADAM {
import idl "adam.avdl";
record ADAMContig {
union { null, string } contigName;
// Do we need the reference filename if we also have URL?
union { null, string } referenceName;
union { null, long } referenceLength = null;
union { null, string } referenceURL = null;
union { null, string } referenceMD5 = null;
}
record ADAMVariant {
union { null, ADAMContig } contig;
union { null, long } position;
array<Base> referenceAlleles;
array<Base> variantAlleles;
}
enum ADAMGenotypeAllele {
Ref, Alt, NoCall
}
// This record represents all stats that, inside a VCF, are stored outside of the
// sample but are computed based on the samples. For instance, MAPQ0 is an aggregate
// stat computed from all samples and stored inside the INFO line.
record VariantStatistics {
union { null, double } baseQRankSum;
union { null, double } clippingRankSum;
union { null, int } readDepth;
union { null, boolean } downsampled;
union { null, double } fisherStrandBiasPValue; // Phred-scaled.
union { null, float } haplotypeScore;
union { null, float } inbreedingCoefficient;
array<int> alleleCountMLE;
array<int> alleleFrequencyMLE;
union { null, float } rmsMapQ;
union { null, int } mapq0reads;
union { null, float } mqranksum;
union { null, boolean } usedForNegativeTrainingSet;
union { null, boolean } usedForPositiveTrainingSet;
union { null, float } variantQualityByDepth;
union { null, float } readPositionRankSum;
union { null, float } vqslod; // log odds ratio of being a true vs false variant under trained gaussian mixture model
union { null, string } culprit;
}
record ADAMGenotype {
union { null, ADAMVariant } variant;
// Variant-level "provenance" data, i.e. data shared amongst all concurrently genotyped samples
union { null, VariantStatistics } variantStats;
union { null, boolean } varIsFiltered = null; // "null" implies no filters were applied
array <string> varFilters = null;
// Sample-level data, i.e. data specific to this particular sample
union { null, string } sampleId = null;
// Length is equal to the ploidy
array <ADAMGenotypeAllele> alleles = null;
union { null, int } referenceReadDepth;
union { null, int } alternateReadDepth;
union { null, int } readDepth;
union { null, string } filter;
union { null, int } genotypeQuality;
// In the ADAM world we split multiallelic VCF lines into multiple
// single-alternate records. This bit is set if that happened for this
// record.
union { null, boolean } splitFromMultiAllelic;
array<int> genotypeLikelihoods; // Phred-scaled. Always length 3 since we are not multiallelic.
union { null, boolean } isPhased = null;
union { null, string } phaseSetId = null;
union { null, int } phaseQuality = null;
}
record ADAMDatabaseVariantAnnotation {
union { null, ADAMVariant } variant;
union { null, int } dbsnpId = null;
}
}
|