Answer:
grep Promoter hesc.chromHmm.bed > hesc.promoters.bed
cat exons.bed hesc.promoters.bed | sort -k1,1 -k2,2n | exons.and.promoters.bed
bedtools complement -i exons.and.promoters.bed -g genome.txt > notexonsorpromoters.bed
Answer:
bedtools closest -a gwas.bed -b exons.bed -d | head
chr1 1005805 1005806 rs3934834 chr1 1007125 1007955 NM_001205252_exon_0_0_chr1_1007126_r 0 - 1320
chr1 1079197 1079198 rs11260603 chr1 1078118 1079434 NR_038869_exon_2_0_chr1_1078119_f 0 + 0
chr1 1247493 1247494 rs12103 chr1 1247397 1247527 NM_001256456_exon_1_0_chr1_1247398_r 0 - 0
chr1 1247493 1247494 rs12103 chr1 1247397 1247527 NM_001256460_exon_1_0_chr1_1247398_r 0 - 0
chr1 1247493 1247494 rs12103 chr1 1247397 1247527 NM_001256462_exon_1_0_chr1_1247398_r 0 - 0
chr1 1247493 1247494 rs12103 chr1 1247397 1247527 NM_001256463_exon_1_0_chr1_1247398_r 0 - 0
chr1 1247493 1247494 rs12103 chr1 1247397 1247527 NM_017871_exon_1_0_chr1_1247398_r 0 - 0
chr1 2069171 2069172 rs425277 chr1 2066700 2066786 NM_001033581_exon_1_0_chr1_2066701_f 0 + 2386
chr1 2069171 2069172 rs425277 chr1 2066700 2066786 NM_001033582_exon_1_0_chr1_2066701_f 0 + 2386
chr1 2069171 2069172 rs425277 chr1 2066700 2066786 NM_001242874_exon_1_0_chr1_2066701_f 0 + 2386
bedtools closest -a gwas.bed -b exons.bed -d \
| awk '{ sum += $11 } END { if (NR > 0) print sum / NR }'
46713.1
makewindows
tool.)Answer:
bedtools makewindows -g genome.txt -w 500000 > genome.windows.bed
bedtools intersect -a genome.windows.bed -b exons.bed -c > genome.windows.exoncount.bedg
or…
bedtools makewindows -g genome.txt -w 500000 \
| bedtools intersect -a - -b exons.bed -c \
> genome.windows.exoncount.bedg
Answer:
bedtools intersect -a exons.bed \
-b <(grep Enhancer hesc.chromHmm.bed) \
-wa -wb -f 1.0 \
| head
chr1 948846 948956 NM_005101_exon_0_0_chr1_948847_f 0 + chr1 948337 949337 4_Strong_Enhancer
chr1 1051439 1051736 NM_017891_exon_9_0_chr1_1051440_r 0 - chr1 1051337 1051737 6_Weak_Enhancer
chr1 1109285 1109306 NM_001130045_exon_0_0_chr1_1109286_f 0 + chr1 1108537 1109537 6_Weak_Enhancer
chr1 1109803 1109869 NM_001130045_exon_2_0_chr1_1109804_f 0 + chr1 1109737 1109937 6_Weak_Enhancer
chr1 1219357 1219470 NM_001130413_exon_4_0_chr1_1219358_f 0 + chr1 1219137 1220137 7_Weak_Enhancer
chr1 1219357 1219470 NR_037668_exon_4_0_chr1_1219358_f 0 + chr1 1219137 1220137 7_Weak_Enhancer
chr1 1229202 1229313 NM_030649_exon_1_0_chr1_1229203_r 0 - chr1 1228937 1229937 6_Weak_Enhancer
chr1 1229469 1229579 NM_030649_exon_2_0_chr1_1229470_r 0 - chr1 1228937 1229937 6_Weak_Enhancer
chr1 1234724 1234736 NM_030649_exon_14_0_chr1_1234725_r 0 - chr1 1234137 1234937 7_Weak_Enhancer
chr1 1245060 1245231 NM_153339_exon_4_0_chr1_1245061_f 0 + chr1 1244937 1245337 4_Strong_Enhancer
bedtools intersect -a exons.bed \
-b <(grep Enhancer hesc.chromHmm.bed) \
-wa -wb -f 1.0 -u \
| wc -l
13746
Answer (Any idea why we need -u?):
wc -l gwas.bed
17680 gwas.bed
bedtools intersect -a gwas.bed -b exons.bed -u | wc -l
1625
echo "foo" | awk '{print 1625/17680}'
0.0919118
Answer (Any idea why we need -u?):
bedtools intersect -a gwas.bed -b <(egrep "Enhancer|Promoter" hesc.chromHmm.bed) -u \
| wc -l
1285
echo "foo" | awk '{print 1285/17680}'
0.072681
Answer:
bedtools flank -l 2 -r 2 -i exons.bed -g genome.txt > splice-sites.bed
Or:
bedtools slop -b 2 -i exons.bed -g genome.txt > exons.plus2.bed
bedtools subtract -a exons.plus2.bed -b exons.bed > splice-sites.bed
grep
).Answer:
bedtools jaccard -a cpg.bed -b <(grep Enhancer hesc.chromHmm.bed)
intersection union-intersection jaccard n_intersections
1148180 132977386 0.0086344 4969
bedtools jaccard -a cpg.bed -b <(grep Promoter hesc.chromHmm.bed)
intersection union-intersection jaccard n_intersections
15661111 53551816 0.292448 20402
Answer:
bedtools shuffle -i <(grep Promoter hesc.chromHmm.bed) -g genome.txt \
| sort -k1,1 -k2,2n \
> promoters.shuffled.bed
bedtools jaccard -a cpg.bed -b promoters.shuffled.bed
intersection union-intersection jaccard n_intersections
294071 68556207 0.00428949 78
awk
or perl
here, as well as the groupby tool.)