-
Notifications
You must be signed in to change notification settings - Fork 81
/
SNP_compare
85 lines (76 loc) · 3.82 KB
/
SNP_compare
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#### comparison of distributions of SNPs in case and control samples in 8q24 region
#### within 200 prostate cancer patients and 200 normal control
source("http://bioconductor.org/biocLite.R")
biocLite("VariantAnnotation")
library(VariantAnnotation)
setwd("your working directory")
all_files=list.files()
snp_case_8q24=data.frame()
snp_case_8q24_count=data.frame()
for (index in seq(3,369,2)){
print (index)
vcf=readVcf(all_files[index],'8q24_ref.fa')
filename=all_files[index]
filename=unlist(strsplit(filename, "_"))[1]
location=c();ref=c();alt=c();
string=names(rowData(vcf))
qual_value=rowData(vcf)$QUAL
length(string)->len_records
for (i in 1:len_records) {
ith_string=string[i]
location=append(location,unique(na.omit(as.numeric(unlist(strsplit(ith_string, "[^0-9]+"))))))
ref=append(ref,substr(ith_string,nchar(ith_string)-2,nchar(ith_string)-2))
alt=append(alt,substr(ith_string,nchar(ith_string),nchar(ith_string)))}
print (location); print (ref); print (alt)
if (length(location)!=0){
snp_case_8q24=rbind(snp_case_8q24,data.frame(filename,location,ref,alt,qual_value))
snp_case_8q24_count=rbind(snp_case_8q24_count,data.frame(filename,length(location)))}
}
snp_control_8q24=data.frame()
snp_control_8q24_count=data.frame()
for (index in seq(371,745,2)){
print (index)
vcf=readVcf(all_files[index],'8q24_ref.fa')
filename=all_files[index]
print (filename)
filename=unlist(strsplit(filename, "_"))[1]
location=c();ref=c();alt=c();
string=names(rowData(vcf))
qual_value=rowData(vcf)$QUAL
length(string)->len_records
for (i in 1:len_records) {
ith_string=string[i]
location=append(location,unique(na.omit(as.numeric(unlist(strsplit(ith_string, "[^0-9]+"))))))
ref=append(ref,substr(ith_string,nchar(ith_string)-2,nchar(ith_string)-2))
alt=append(alt,substr(ith_string,nchar(ith_string),nchar(ith_string)))}
print (location); print (ref); print (alt)
if (length(location)!=0){
snp_control_8q24=rbind(snp_control_8q24,data.frame(filename,location,ref,alt,qual_value))
snp_control_8q24_count=rbind(snp_control_8q24_count,data.frame(filename,length(location)))}
}
########################################################################################################
count.table=data.frame()
for (i in 1:8500){
if (i %in% snp_case_8q24[,2] | i %in% snp_control_8q24[,2]){
count.table=rbind(count.table,c(i,table(snp_case_8q24[,2])[as.character(i)],table(snp_control_8q24[,2])[as.character(i)]))
}
}
colnames(count.table)=c('SNP_loci','case','control')
count.table[is.na(count.table)]=0
library(MASS)
chisq.test(count.table$case,count.table$control)
plot(count.table$case,count.table$control,pch='*',col=c('black','red')[((count.table$case/count.table$control>2 | count.table$case/count.table$control<1/2) & count.table$case+count.table$control>20) +1],xlab='case',ylab='control')
abline(0,2,col=4);abline(0,1/2,col=4)
diff_index=((count.table$case/count.table$control>2 | count.table$case/count.table$control<1/2) & count.table$case+count.table$control>20)
count.table[diff_index,]
library(plotrix)
addtable2plot(120,-1,as.data.frame(count.table[diff_index,]),bty="o",hlines=TRUE,vlines=TRUE)
#####################################chi-square test for each position and plotting with significance in log space
chiset=c()
for (i in 1:length(count.table$case))
{chiset=c(chiset,(chisq.test(matrix(c(184,188,count.table$case[i],count.table$control[i]),2))$p.value))}
plot(count.table$case,count.table$control,pch=19,cex=chiset[,7],xlab='SNP occurance in prostate cancer patients',ylab='SNP occurance in normal control',
col=c('steelblue2',rgb(1,0,0,0.5) )[((count.table$case>count.table$control) & count.table$case+count.table$control>20) +1])
legend(x=105,y=15,col=c(rgb(1,0,0,1/4),'steelblue2'),
legend=c('Cancer enriched','Normal enriched'),
pch=c(19,19),bty = "n")