1 embl_acc European Nucleotide Archive (was EMBL) accession NULL
2 status Status NULL
3 synonym Synonym NULL
4 name Name Alternative/long name
5 type Type of feature NULL
6 toplevel Top Level Top Level Non-Redundant Sequence Region
7 GeneCount Gene Count Total Number of Genes
10 SNPCount Short Variants Total Number of SNPs
11 codon_table Codon Table Alternate codon table
12 _selenocysteine Selenocysteine NULL
13 bacend bacend NULL
15 miRNA Micro RNA Coordinates of the mature miRNA
16 non_ref Non Reference Non Reference Sequence Region
17 sanger_project Sanger Project name NULL
18 clone_name Clone name NULL
19 fish FISH location NULL
21 org Sequencing centre NULL
22 method Method NULL
23 superctg Super contig id NULL
24 inner_start Max start value NULL
25 inner_end Min end value NULL
26 state Current state of clone NULL
27 organisation Organisation sequencing clone NULL
28 seq_len Accession length NULL
29 fp_size FP size NULL
30 BACend_flag BAC end flags NULL
31 fpc_clone_id fpc clone NULL
32 KnwnPCCount protein_coding_KNOWN Number of Known Protein Coding
33 NovPCCount protein_coding_NOVEL Number of Novel Protein Coding
36 PredPCCount protein_coding_PREDICTED Number of Predicted Protein Coding
37 IGGeneCount IG_gene Number of IG Genes
38 IGPsGenCount IG_pseudogene Number of IG Pseudogenes
39 TotPsCount total_pseudogene Total Number of Pseudogenes
42 KnwnPCProgCount protein_coding_in_progress_KNOWN Number of Known Protein Coding in progress
43 NovPCProgCount protein_coding_in_progress_NOVEL Number of Novel Protein Coding in progress
44 AnnotSeqLength Annotated sequence length Annotated Sequence
45 TotCloneNum Total number of clones Total Number of Clones
46 NumAnnotClone Fully annotated clones Number of Fully Annotated Clones
47 ack Acknowledgement Acknowledgement for manual annotation
48 htg_phase High throughput phase High throughput genomic sequencing phase
50 chromosome Chromosome Chromosomal location for supercontigs that are not assembled
51 nonsense Nonsense Mutation Strain specific nonesense mutation
52 author Author Group resonsible for Vega annotation
53 author_email Author email address Author email address
54 remark Remark Annotation remark
55 transcr_class Transcript class Transcript class
57 ccds CCDS CCDS identifier
58 CCDS_PublicNote CCDS Public Note Public Note for CCDS identifier, provided by http://www.ncbi.nlm.nih.gov/CCDS
59 Frameshift Frameshift Frameshift modelled as intron
62 ncRNA Structure RNA secondary structure line
63 skip_clone skip clone Skip clone in align_by_clone_identity.pl NULL
64 coding_cnt Coding genes Number of protein coding Genes
67 pseudogene_cnt Pseudogenes Number of pseudogenes
80 supercontig SuperContig name NULL
81 well_name Well plate name NULL
82 bacterial Bacterial NULL
90 bacend_well_nam BACend well name NULL
91 alt_well_name Alt well name NULL
92 TranscriptEdge Transcript Edge NULL
93 alt_embl_acc Alt European Nucleotide Archive (was EMBL) acc NULL
94 alt_org Alt org NULL
95 intl_clone_name International Clone Name NULL
96 embl_version European Nucleotide Archive (was EMBL) Version NULL
97 chr Chromosome Name Chromosome Name Contained in the Assembly
98 equiv_asm Equivalent EnsEMBL assembly For full chromosomes made from NCBI AGPs
109 HitSimilarity hit similarity percentage id to parent transcripts
110 HitCoverage hit coverage coverage of parent transcripts
111 PropNonGap proportion non gap proportion non gap
112 NumStops number of stops NULL
113 GapExons gap exons number of gap exons
114 SourceTran source transcript source transcript
115 EndNotFound end not found end not found
116 StartNotFound start not found start not found
117 Frameshift Fra Frameshift modelled as intron NULL
118 ensembl_name Ensembl name Name of equivalent Ensembl chromosome
119 NoAnnotation NoAnnotation Clones without manual annotation
120 hap_contig Haplotype contig Contig present on a haplotype
121 annotated Clone Annotation Status NULL
122 keyword Clone Keyword NULL
123 hidden_remark Hidden Remark NULL
124 mRNA_start_NF mRNA start not found NULL
125 mRNA_end_NF mRNA end not found NULL
126 cds_start_NF CDS start not found NULL
127 cds_end_NF CDS end not found NULL
128 write_access Write access for Sequence Set 1 for writable , 0 for read-only
129 hidden Hidden Sequence Set NULL
130 vega_name Vega name Vega seq_region.name
131 vega_export_mod Export mode E (External), I (Internal) etc
132 vega_release Vega release Vega release number
133 atag_CLE Clone_left_end Clone_lef_end feature marked in GAP database
134 atag_CRE Clone_right_end Clone_right_end feature marked in GAP database
135 atag_Misc Misc miscellaneous feature marked in GAP database
136 atag_Unsure Unsure region of uncertain DNA sequence marked in GAP database
137 MultAssem Multiple Assembled seq region Part of Seq Region is part of more than one assembly
140 wgs WGS contig WGS contig integrated into the map
141 bac AGP clones tiling path of clones
142 GeneGC Gene GC Percentage GC content for this gene
143 TotAssemblyLeng Finished sequence length Length of the assembly not counting sequence gaps
144 amino_acid_sub Amino acid substitution Some translations have been manually curated for amino acid substitiutions. For example a stop codon may be changed to an amino acid in order to prevent premature truncation, or one amino acid can be substituted for another.
145 _rna_edit rna_edit RNA edit
146 kill_reason Kill Reason Reason why a transcript has been killed
147 strip_UTR Strip UTR Transcript needs bad UTR removing
148 TotAssLength Finished sequence length Finished Sequence
149 PsCount pseudogene Number of Pseudogenes
152 TotPTCount total_processed_transcript Total Number of Processed Transcripts
153 TotPCCount total_protein_coding Total Number of Protein Coding
156 PolyPsCount polymorphic_pseudogene Number of Polymorphic Pseudogenes
157 TotIGGeneCount total_IG_gene Total Number of IG Genes
158 ProcPsCount proc_pseudogene Number of Processed Pseudogenes
159 UnPsCount unproc_pseudogene Number of Unprocessed Pseudogenes
160 TPsCount transcribed_pseudogene Number of Transcribed Pseudogenes
161 TECCount TEC Number of TEC Genes
170 initial_met Initial methionine Set first amino acid to methionine
171 NonGapHCov NonGapHCov NULL
172 otter_support otter support Evidence ID that was used as supporting feature for building a gene in Vega
173 enst_link enst link Code to link a OTTT with an ENST when they both share the CDS of ENST
174 upstream_ATG upstream ATG Alternative ATG found upstream of the defined as start ATG for the transcript
175 TPPsCount transcribed_processed_pseudogene Number of Transcribed Processed Pseudogenes
176 TUPsCount transcribed_unprocessed_pseudogene Number of Transcribed Unprocessed Pseudogenes
177 UniPsCount unitary_pseudogene Number of Unitary Pseudogenes
180 TUyPsCount transcribed_unitary_pseudogene Number of Transcribed Unitary Pseudogenes
181 PolyCount polymorphic Number of Polymorphic Genes
184 TRGeneCount TR_gene Number of TR Genes
185 TRPsCount TR_pseudo Number of TR Pseudogenes
186 tp_ott_support otter protein transcript support Evidence ID that was used as supporting feature for building a gene in Vega
187 td_ott_support otter dna transcript support Evidence ID that was used as supporting feature for building a gene in Vega
188 ep_ott_support otter protein exon support Evidence ID that was used as supporting feature for building a gene in Vega
189 ed_ott_support otter dna exon support Evidence ID that was used as supporting feature for building a gene in Vega
191 StopGained SNP causes stop codon to be gained This transcript has a variant that causes a stop codon to be gained in at least 10 percent of a HapMap population
192 StopLost SNP causes stop codon to be lost This transcript has a variant that causes a stop codon to be lost in at least 10 percent of a HapMap population
198 lost_frameshift lost_frameshift Frameshift on the query sequence is lost in the target sequence
199 AltThreePrime Alternate three prime end The position of other possible three prime ends for the transcript
216 GeneInLRG Gene in LRG This gene is contained within an LRG region
217 GeneOverlapLRG Gene overlaps LRG This gene is partially overlapped by a LRG region (start or end outside LRG)
218 readthrough_tra readthrough transcript Havana readthrough transcripts
300 CNE Constitutive exon An exon that is always included in the mature mRNA, even in different mRNA isoforms
301 CE Cassette exon One exon is spliced out of the primary transcript together with its flanking introns
302 IR Intron retention A sequence is spliced out as an intron or remains in the mature mRNA transcript
303 MXE Mutually exclusive exons In the simpliest case, one or two consecutive exons are retained but not both
304 A3SS Alternative 3' sites Two or more splice sites are recognized at the 5' end of an exon. An alternative 3' splice junction (acceptor site) is used, changing the 5' boundary of the downstream exon
305 A5SS Alternative 5' sites Two or more splice sites are recognized at the 3' end of an exon. An alternative 5' splice junction (donor site) is used, changing the 3' boundary of the upstream exon
306 AFE Alternative first exon The second exons of each variant have identical boundaries, but the first exons do not overlap
307 ALE Alternative last exon Penultimate exons of each splice variant have identical boundaries, but the last exons do not overlap
308 II Intron isoform Alternative donor or acceptor splice sites lead to truncation or extension of introns, respectively
309 EI Exon isoform Alternative donor or acceptor splice sites leads to truncation or extension of exons, respectively
310 AI Alternative initiation Alternative choice of promoters
311 AT Alternative termination Alternative choice of polyadenylation sites
312 patch_fix Assembly Patch Fix Assembly patch that will, in the next assembly release, replace the corresponding sequence found in the current assembly
313 patch_novel Assembly Patch Novel Assembly patch that will, in the next assembly release, be retained as an alternate non-reference sequence in a similar way to haplotypes
314 LRG Locus Reference Genomic Locus Reference Genomic sequence
315 NoEvidence Evidence for transcript removed Supporting evidence for this projected transcript has been removed
316 circular_seq Circular sequence Circular chromosome or plasmid molecule
317 external_db External database External database to which seq_region name may be linked
318 split_tscript split_tscript split_tscript
319 Threep Three prime end Alternate three prime end
320 gene_cluster Gene cluster Havana annotated gene cluster
328 _rib_frameshift Ribosomal Frameshift Position and magnitude of frameshift
346 PutPCCount protein_coding_PUTATIVE Number of Putative Protein Coding
347 proj_alt_seq Projection altered sequence Projected sequence differs from original
348 hav_gene_type Havana gene biotype Gene biotype assigned by Havana
353 noncoding_cnt Non coding genes Number of non coding genes
358 PHIbase_mutant PHI-base mutant PHI-base phenotype of the mutants
360 ncrna_host ncrna_host Havana ncrna_host gene
361 peptide-class Peptide classification The classification of the gene or transcript based on alignment to NR (values: TE WH NH)
362 working-set Working Gene Set High-confidence set of genes, composed of evidence-based genes and non-overlapping protein-coding ab initio gene models
363 filtered-set Filtered Gene Set v1 Working genes that are screened for TE content and orthology with sorghum and rice
364 super-set Super Working Gene Set Set of all working gene set loci from both Builds 4a and 5a
365 projected4a2 Projected by alignment Temporary (Monday, August 23, 2010)
366 merged Merged species NULL
367 karyotype_rank Rank in the karyotype For a given seq_region, if it is part of the species karyotype, will indicate its rank
368 noncoding_acnt Non coding genes Number of non coding genes on alternate sequences
369 coding_acnt Coding genes Number of protein coding genes on alternate sequences
370 pseudogene_acnt Pseudogenes Number of pseudogenes on alternate sequences
371 clone_end Clone end Side of the contig on which a vector lies (enum:RIGHT, LEFT)
372 contig_scaffold Contig Scaffold Scaffold that contains mutually ordered contigs
373 current_version Current Accession Version Identifies the most recent version of an accession
374 seq_status Sequence Status Sequence status.
375 clone_vector Vector sequence A clone-end vector associated with a contig (enum:SP6, T7).
376 creation_date Creation date Creation date of annotation
377 update_date Update date Last update date of annotation
378 seq_date Sequence date Sequence date
380 havana_cv Havana CV term Controlled vocabulary terms from Havana
381 TlPPsCount translated_processed_pseudogene Number of Translated Processed Pseudogenes
382 NoTransRefError No translations due to reference error This gene is believed to include protein coding transcripts, but no transcript has a translation due to a reference assembly error making specifying the translation impossible.
383 parent_exon_key parent_exon_key The exon key to identify a projected transcript's parent transcript.
386 parent_sid parent_sid The parent stable ID to identify a projected transcript's parent transcript. For internal statistics use only since this method does not work in all cases.
387 noncoding_acnt_s Small non coding genes Number of small non coding genes on alternate sequences
388 noncoding_acnt_m Misc non coding genes Number of unclassified (miscellaneous) non coding genes on alternate sequences
389 noncoding_cnt_s Small non coding genes Number of small non coding genes
390 noncoding_cnt_l Long non coding genes Number of long non coding genes
391 TlUPsCount translated_unprocessed_pseudogene Number of Translated Unprocessed Pseudogenes
393 AFFYMETRIXCount AFFYMETRIX Count Total Number of AFFYMETRIX features
394 RFLPCount RFLP Count Total Number of RFLP features
395 xref_id Xref ID ID of associated database reference
396 vega_chr_type Vega chrom type Type of chromosome - haplotype, other, etc
398 genscan Genscan gene predictions Number of prediction genes generated by Genscan
399 gsc GSC gene prediction Number of prediction genes generated by gsc
400 snap Snap gene prediction Number of prediction genes generated by Snap
401 fgenesh FGENESH gene prediction Number of prediction genes generated by FGENESH
402 genefinder Genefinder gene prediction Number of prediction genes generated by Genefinder
403 transcript_cnt Gene transcripts Number of transcripts
404 transcript_acnt Gene transcripts Number of transcripts on the alternate sequences
405 ref_length Golden Path Length Length of the primary assembly
406 total_length Base Pairs Total length of the assembly
408 coding_rcnt Readthrough coding genes Number of readthrough coding genes
409 coding_racnt Readthrough coding genes Number of readthrough coding genes on alternate sequences
410 noncoding_racnt_l Readthrough long non coding genes Number of readthrough long non coding genes on alternate sequences
411 noncoding_racnt_s Readthrough small non coding genes Number of readthrough small non coding genes on alternate sequences
412 noncoding_rcnt_s Readthrough small non coding genes Number of readthrough small non coding genes
413 noncoding_rcnt_l Readthrough long non coding genes Number of readthrough long non coding genes
414 pseudogene_rcnt Readthrough pseudogenes Number of readthrough pseudogenes
415 pseudogene_racnt Readthrough pseudogenes Number of readthrough pseudogenes on alternate sequences
416 gencode_level GENCODE annotation level level 1 (verified loci), level 2 (manually annotated loci), level 3 (automatically annotated loci)
417 gencode_basic GENCODE basic annotation GENCODE Basic is a view provided by UCSC for users. It includes a subset of the GENCODE transcripts. In general, for protein coding genes it will show only the full length models (unless a protein coding gene has no full-length models, in which case other rules apply). For noncoding genes, it will also only show the full-length (mRNA start and end found) models (unless there are no full-length models, in which case other rules apply).
418 struct_var Structural variants Total Number of structural variants
419 genblast GenBlastG gene predictions Number of prediction genes generated by GenBlastG
420 syn_gene_pairs Syntenic gene pairs Syntenic gene relationship from Gramene pipeline
421 vectorbase_maker_pre VectorBase gene predictions Number of prediction genes generated with MAKER, by VectorBase.
422 trnascan tRNAscan-SE predictions Number of predicted tRNA genes generated by tRNAscan-SE
423 tgac_pred_supp7 T. turgidum RNA-seq alignments Number of T. turgidum RNA-seq alignments from Krasileva et al.
424 tgac_pred_supp17 T. aestivum RNA-seq alignments Number of T. aestivum RNA-seq alignments from Krasileva et al.
425 genome_component Genome Component Name For polyploid genome, the genome component name the seq_region belongs to
426 transcript_whl RNA-seq transcripts RNA-seq transcripts from EchinoBase
427 appris APPRIS APPRIS is a system that deploys a range of computational methods to provide value to the annotations of the human genome. APPRIS also selects one of the CDS for each gene as the principal isoform. APPRIS defines the principal variant by combining protein structural and functional information and information from the conservation of related species. principal1 - APPRIS principal isoform. principal2 - APPRIS candidate isoform (CCDS). principal3 - APPRIS candidate isoform (earliest CCDS). principal4 - APPRIS candidate isoform (longest CCDS). principal5 - APPRIS candidate isoform (longest coding sequence). alternative1 - APPRIS candidate isoform that is conserved in at least three tested species. alternative2 - APPRIS candidate isoform that appears to be conserved in fewer than three tested species
428 TSL Transcript Support Level Transcription Support Level (TSL) is a method to highlight the well-supported and poorly-supported transcript models for users. The method relies on the primary data that can support full-length transcript structure and data are provided by UCSC. The following categories are assigned to each of the evaluated annotations. tsl1 - all splice junctions of the transcript are supported by at least one non-suspect mRNA. tsl2 - the best supporting mRNA is flagged as suspect or the support is from multiple ESTs. tsl3 - the only support is from a single EST. tsl4 - the best supporting EST is flagged as suspect. tsl5 - no single transcript supports the model structure. tslNA - the transcript was not analyzed for one of the following reasons: pseudogene annotation, including transcribed pseudogenes.Human leukocyte antigen (HLA) transcript. Immunoglobin gene transcript. T-cell receptor transcript. Single-exon transcript (will be included in a future version)
429 protein_coverage Protein Coverage Protein coverage for this gene derived from geneTree in compara
430 consensus_coverage Consensus Coverage Consensus coverage for this gene derived from geneTree in compara
437 lncRNACount lncRNA_Count Number of lncRNAs
438 ncRNACount ncRNA_Count Number of ncRNAs
439 UnclassPTCount UnclassPT_Count Number of Unclassified Processed Transcripts
444 noncoding_cnt_m Misc non coding genes Number of unclassified (miscellaneous) non coding genes
445 noncoding_rcnt_m Readthrough misc non coding genes Number of readthrough unclassified (miscellaneous) non coding genes
446 noncoding_racnt_m Readthrough misc non coding genes Number of readthrough unclassified (miscellaneous) non coding genes on alternate sequences
447 noncoding_acnt_l Long non coding genes Number of long non coding genes on alternate sequences
448 noncoding_racnt Readthrough non coding genes Number of readthrough non coding genes on alternate sequences
449 noncoding_rcnt Readthrough non coding genes Number of readthrough non coding genes
469 SO_accession SO accession Sequence Ontology accession
470 SO_term SO term Sequence Ontology term
471 display_term display term Ensembl display term
472 NCBI_term NCBI term NCBI term
473 feature_SO_term feature SO term Sequence Ontology term for the associated feature
474 rank rank Relative severity of this variation consequence
475 polyphen_prediction polyphen prediction PolyPhen-2 prediction
476 sift_prediction sift prediction SIFT prediction
477 short_name Short name A shorter name for an instance, e.g. a VariationSet
478 dbsnp_clin_sig dbSNP/ClinVar clinical significance The clinical significance of a variant as reported by ClinVar and dbSNP
479 dgva_clin_sig DGVa clinical significance The clinical significance of a structural variant as reported by DGVa
480 clinvar_clin_sig ClinVar clinical significance The clinical significance of a variant as reported by ClinVar
481 prot_func_analysis Protein function analysis The program used to make protein function predictions
482 associated_gene Associated gene ID of gene(s) linked by phenotype association
483 risk_allele Risk allele Risk allele in phenotype association
484 p_value P-value P-value denoting significance of an observed phenotype annotation
485 variation_names Variation names Variant ID(s) linked with a phenotype association
486 sample_id Sample ID Sample ID for source of phenotype association
487 strain_id Strain ID Strain ID for source of phenotype association
488 lod_score LOD score Log Of Odds score
489 variance Variance Variance statistic
490 inheritance_type Inheritance type Inheritance type of a trait
491 external_id External ID External identifier for an entity
492 odds_ratio Odds ratio Odds ratio used to denote significance of an observed phenotype annotation
493 beta_coef Beta coefficient Beta coefficient (or standardized coefficient) used to denote significance of an observed phenotype annotation
494 allele_symbol Allele symbol Allele symbol linked with phenotype association
495 allele_accession_id Allele accession ID Allele accession ID linked with phenotype association
496 marker_accession_id Marker accession ID Marker ID linked with phenotype association
497 evidence Variant evidence status Evidence status for a variant
498 review_status ClinVar review_status ClinVar review_status for assertation
499 based_on Evidence type used for protein impact prediction Evidence type used for a PolyPhen protein impact prediction
500 conservation_score Sift conservation score Median conservation va in an alignment used to make a Sift prediction
501 sequence_number Number of sequences in alignment Number of protein sequences in the alignment use to make a protein impact prediction
502 otter_truncated Otter truncated feature This feature extends beyond the slice, but has been trimmed. (For use in otter client-server communications.)
503 trans_spliced Trans-spliced transcript A single RNA transcript derived from multiple precursor mRNAs.
505 genebuild_msu7_tes TE-related Gene (MSU) Number of TE-related genes predicted by MSU through a process of automatic and manual curation
506 ibsc_low_confidence PGSB low-confidence Number of low-confidence genes annotated by the International Barley Sequencing Consortium
507 pubmed_id PubMed ID PubMed identifier
508 phenotype_type Phenotype type Type of the phenotype information