| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
 | protocol ADAM {
import idl "adam.avdl";
record ADAMContig {
  union { null, string } contigName;
  // Do we need the reference filename if we also have URL?
  union { null, string } referenceName;
  union { null, long }   referenceLength = null;
  union { null, string } referenceURL = null;
  union { null, string } referenceMD5 = null;
}
record ADAMVariant {
  union { null, ADAMContig } contig;
  union { null, long }       position;
  array<Base> referenceAlleles;
  array<Base> variantAlleles;
}
enum ADAMGenotypeAllele {
  Ref, Alt, NoCall
}
// This record represents all stats that, inside a VCF, are stored outside of the
// sample but are computed based on the samples.  For instance,  MAPQ0 is an aggregate
// stat computed from all samples and stored inside the INFO line.
record VariantStatistics {
  union { null, double } baseQRankSum;
  union { null, double } clippingRankSum;
  union { null, int } readDepth;
  union { null, boolean } downsampled;
  union { null, double } fisherStrandBiasPValue; // Phred-scaled.
  union { null, float } haplotypeScore;
  union { null, float } inbreedingCoefficient;
  array<int> alleleCountMLE;
  array<int> alleleFrequencyMLE;
  union { null, float } rmsMapQ;
  union { null, int } mapq0reads;
  union { null, float } mqranksum;
  union { null, boolean } usedForNegativeTrainingSet;
  union { null, boolean } usedForPositiveTrainingSet;
  union { null, float } variantQualityByDepth;
  union { null, float } readPositionRankSum;
  union { null, float } vqslod; // log odds ratio of being a true vs false variant under trained gaussian mixture model
  union { null, string } culprit;
}
record ADAMGenotype {
  union { null, ADAMVariant } variant;
  // Variant-level "provenance" data, i.e. data shared amongst all concurrently genotyped samples
  union { null, VariantStatistics } variantStats;
  union { null, boolean } varIsFiltered = null;  // "null" implies no filters were applied
  array <string> varFilters = null;
  // Sample-level data, i.e. data specific to this particular sample
  union { null, string } sampleId = null;
  // Length is equal to the ploidy
  array <ADAMGenotypeAllele> alleles = null;
  union { null, int } referenceReadDepth;
  union { null, int } alternateReadDepth;
  union { null, int } readDepth;
  union { null, string } filter;
  union { null, int } genotypeQuality;
  // In the ADAM world we split multiallelic VCF lines into multiple
  // single-alternate records.  This bit is set if that happened for this
  // record.
  union { null, boolean } splitFromMultiAllelic;
  array<int> genotypeLikelihoods; // Phred-scaled. Always length 3 since we are not multiallelic.
  union { null, boolean } isPhased = null;
  union { null, string }  phaseSetId = null;
  union { null, int }     phaseQuality = null;
}
record ADAMDatabaseVariantAnnotation {
  union { null, ADAMVariant } variant;
  union { null, int } dbsnpId = null;
}
}
 |