Skip to content

Commit

Permalink
Updated CRAVAT and IGV reports
Browse files Browse the repository at this point in the history
  • Loading branch information
vrushali-broad committed May 10, 2019
1 parent 7ce40c8 commit 86e93b9
Show file tree
Hide file tree
Showing 20 changed files with 1,448 additions and 33 deletions.
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
[submodule "igv.js-reports"]
path = igv.js-reports
url = https://github.com/igvteam/igv.js-reports.git
84 changes: 61 additions & 23 deletions ctat_mutations
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ logging.basicConfig(stream=sys.stderr, format=FORMAT, level=logging.INFO)
# Constants

SCRIPTDIR = os.path.sep.join([os.path.dirname(os.path.realpath(__file__)), "src"])
VIZDIR = os.path.sep.join([os.path.dirname(os.path.realpath(__file__)), "igv.js-reports"])
VIZDIR = os.path.sep.join([os.path.dirname(os.path.realpath(__file__)), "igv_reports"])

# Keys for alignment return (dicts)
INDEX_CMD = "cmd"
Expand Down Expand Up @@ -62,6 +62,11 @@ STR_VARIANT_SAMTOOLS = "SAM"
STR_VARIANT_NONE = "NONE"
LSTR_VARIANT_CALLING_CHOICES = [STR_VARIANT_GATK,
STR_VARIANT_NONE]
# variant calling methods
STR_GATK_HC = "HaplotypeCaller"
STR_GATK_M2 = "Mutect2"
LSTR_VARIANT_METHOD_CHOICES = [STR_GATK_HC,
STR_GATK_M2]

# Choices for variant filtering
STR_FILTERING_BCFTOOLS = "BCFTOOLS"
Expand Down Expand Up @@ -441,15 +446,26 @@ class RnaseqSnp:
lcmd_gatk_rna_calling.append(str_depth,'dep.ok')

# Variant calling
str_hap_call = " ".join(["java", "-jar", gatk_path,
"HaplotypeCaller", "-R",
genome_fa, "-I", str_input_bam,
"--recover-dangling-heads","true",
"--dont-use-soft-clipped-bases","-stand-call-conf",
"20.0",
"-O", str_variants_file])
lcmd_gatk_rna_calling.append(Command(str_hap_call,'hap_call.ok'))

if args_call.str_variant_call_method == STR_GATK_HC:
str_hap_call = " ".join(["java", "-jar", gatk_path,
"HaplotypeCaller", "-R",
genome_fa, "-I", str_input_bam,
"--recover-dangling-heads","true",
"--dont-use-soft-clipped-bases","-stand-call-conf",
"20.0",
"-O", str_variants_file])
lcmd_gatk_rna_calling.append(Command(str_hap_call,'hap_call.ok'))

else:
str_m2_call = " ".join(["java", "-jar", gatk_path,
"Mutect2", "-R",
genome_fa, "-I", str_input_bam,
"--dont-use-soft-clipped-bases",
"-stand-call-conf","20.0",
"-A", "ReferenceBases", "-A", "QualByDepth",
"-A", "FisherStrand", "-A", "ChromosomeCounts",
"-O", str_variants_file])
lcmd_gatk_rna_calling.append(Command(str_m2_call,'Mutect2_call.ok'))
return {INDEX_CMD:lcmd_gatk_rna_calling,
INDEX_FILE:str_variants_file}

Expand Down Expand Up @@ -780,15 +796,22 @@ class RnaseqSnp:
C_STR_INIT_FILTER)
str_filtered_variants_index_file = str_filtered_variants_file + ".csi"
# Filter variants
str_filter_command = " ".join(["java -jar",gatk_path,
"VariantFiltration -R",
genome_fa, "-V",
str_variants_file, "-window 35",
"-cluster 3 --filter-name FS",
"-filter \"FS > 30.0\"",
"--filter-name QD","-filter \"QD < 2.0\"",
"-O", str_filtered_variants_file])
cmd_variant_filtration = Command(str_filter_command,'variant_filter.ok')
if args_call.str_variant_call_method == STR_GATK_HC:
str_filter_command = " ".join(["java -jar",gatk_path,
"VariantFiltration -R",
genome_fa, "-V",
str_variants_file, "-window 35",
"-cluster 3 --filter-name FS",
"-filter \"FS > 30.0\"",
"--filter-name QD","-filter \"QD < 2.0\"",
"-O", str_filtered_variants_file])
cmd_variant_filtration = Command(str_filter_command,'variant_filter.ok')
else:
str_filter_command = " ".join(["java -jar",gatk_path,
"FilterMutectCalls",
"-V", str_variants_file,
"-O", str_filtered_variants_file])
cmd_variant_filtration = Command(str_filter_command,'variant_filter.ok')

return{INDEX_CMD: [cmd_variant_filtration],
INDEX_FILE: str_filtered_variants_file}
Expand Down Expand Up @@ -1092,6 +1115,12 @@ class RnaseqSnp:
warnings.warn(str_error)
exit(7)

# if( args_parsed.str_variant_call_method == STR_GATK_M2
# and args_parsed.str_sample_name == None):
# str_error = "".join(["Need to supply a sample name in order to run Mutect2."])
# exit(str_error)


# set full paths in case of relative path settings
if args_parsed.str_bam_file:
args_parsed.str_bam_file = os.path.abspath(args_parsed.str_bam_file)
Expand Down Expand Up @@ -1349,13 +1378,13 @@ class RnaseqSnp:
lcmd_commands.append(Command(str_cmd_copy_bed,'copy_bed.ok'))

html_out=os.path.join(args_parsed.str_out_dir,"igvjs_viewer.html")
str_cmd_html = " ".join([ "python",os.path.sep.join([VIZDIR,"create_variant_report.py"]),
str_cmd_html = " ".join([ "python",os.path.sep.join([VIZDIR,"report.py"]),
os.path.join(args_parsed.str_out_dir,"cancer.vcf"),
genome_fa,
#"--ideogram", examples/variants/cytoBandIdeo.txt,
"--flanking", "1000",
"--infoColumns", "GENE,TISSUE,TUMOR,COSMIC_ID,GENE,SOMATIC",
"--tracks", str_bam_called_from+","+ref_bed+".gz",
"--info-columns", "GENE TISSUE TUMOR COSMIC_ID GENE SOMATIC",
"--tracks", str_bam_called_from+" "+ref_bed+".gz",
"--output", html_out])
lcmd_commands.append(Command(str_cmd_html, 'html_viz.ok'))

Expand Down Expand Up @@ -1563,7 +1592,14 @@ def make_menu():
gatk_args_group.add_argument("--sequencing_platform", metavar = "Sequencing Platform",
dest = "str_sequencing_platform", default = "ILLUMINA", choices = LSTR_SEQ_CHOICES,
help = "The sequencing platform used to generate the samples choices include " + " ".join(LSTR_SEQ_CHOICES) + ".")


gatk_args_group.add_argument("--variant_calling_method", metavar = "Sequencing Platform",
dest = "str_sequencing_platform", default = "ILLUMINA", choices = LSTR_SEQ_CHOICES,
help = "The sequencing platform used to generate the samples choices include " + " ".join(LSTR_SEQ_CHOICES) + ".")

gatk_args_group.add_argument("--variant_call_method", metavar = "Variant Call Method", dest = "str_variant_call_method",
default = STR_GATK_HC, choices = LSTR_VARIANT_METHOD_CHOICES,
help = "Specifies the variant calling method to use.")
# Resources
optional.add_argument("--genome_lib_dir",dest="genome_lib_dir", type=str,
default=os.environ.get('CTAT_GENOME_LIB'),
Expand Down Expand Up @@ -1606,7 +1642,9 @@ if __name__ == "__main__":

# Need to rn, call the script
lcmd_cmds = RnaseqSnp().build_pipeline_cmds(args_parsed)
print(lcmd_cmds)
for cmd in lcmd_cmds:
print(cmd)
pipeliner.add_commands([cmd])
pipeliner.run()

Expand Down
1 change: 0 additions & 1 deletion igv.js-reports
Submodule igv.js-reports deleted from 35d449
Binary file added igv_reports/.DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions igv_reports/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-

"""Top-level package for igv-reports."""

__author__ = """Jim Robinson"""
__email__ = '[email protected]'
__version__ = '0.1.0'
27 changes: 27 additions & 0 deletions igv_reports/bam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pysam

def get_data(bam_file, region=None):
args = ["-b", "-h", bam_file]
if region:
range_string = region['chr'] + ":" + str(region['start']) + "-" + str(region['end'])
args.append(range_string)
return pysam.view(*args)


class BamReader:

def __init__(self, filename):

self.filename = filename


def slice(self, region = None) :
args = ["-b", "-h", self.filename]
if region:
range_string = region['chr'] + ":" + str(region['start']) + "-" + str(region['end'])
args.append(range_string)

args2 = [self.filename, range_string]
samview = pysam.view(*args2)

return pysam.view(*args)
36 changes: 36 additions & 0 deletions igv_reports/bedtable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import json

from .feature import parse


class BedTable:

# Always remember the *self* argument
def __init__(self, bed_file):

self.features = []

featureList = parse(bed_file)
unique_id = 1
for var in featureList:
self.features.append((var, unique_id))
unique_id += 1

def to_JSON(self):

jsonArray = [];

for tuple in self.features:
feature = tuple[0]
unique_id = tuple[1]
obj = {
"unique_id": unique_id,
"Chrom": feature.chr,
"Start": feature.start + 1,
"End": feature.end,
"Name": feature.name
}

jsonArray.append(obj)

return json.dumps(jsonArray)
56 changes: 56 additions & 0 deletions igv_reports/datauri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

from base64 import b64encode
from gzip import compress
import argparse
from . import utils
from .regions import parse_region


#This module exports functions to convert text or binary data to a data URI readable by igv.js.

def get_data_uri(data):

"""
Return a data uri for the input, which can be either a string or byte array
"""

if isinstance(data, str):
data = compress(data.encode())
mediatype = "data:application/gzip"
else:
if data[0] == 0x1f and data[1] == 0x8b:
mediatype = "data:application/gzip"
else:
mediatype = "data:application:octet-stream"

enc_str = b64encode(data)

data_uri = mediatype + ";base64," + str(enc_str)[2:-1]
return data_uri


def file_to_data_uri(filename, filetype=None, genomic_range=None):
reader = utils.getreader(filename, filetype)
region = parse_region(genomic_range) if genomic_range else None
data = reader.slice(region)
data_uri = get_data_uri(data)
return data_uri


def main():
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="name of file to be converted to data uri")
parser.add_argument("-t", "--filetype", help="type of file to be converted to data uri")
parser.add_argument("-r", "--region" , help="genomic region to be converted in the form chr:start-stop")
args = parser.parse_args()

if args.region:
region = parse_region(args.region)
else:
region = None

uri = file_to_data_uri(args.filename, args.filetype, region)
print(uri)

if __name__ == "__main__":
main()
28 changes: 28 additions & 0 deletions igv_reports/fasta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from . import regions
import pysam

def get_data(fasta_file,region=None):

if None == region:

with open(fasta_file,"r") as f:

return(f.read())

else :

if isinstance(region,str):
region = regions.parse_region(region)

chr = region["chr"]
start = region["start"] - 1
end = region["end"]

fasta = pysam.FastaFile(fasta_file)

slice_seq = fasta.fetch(chr, start, end)

fasta.close()

return slice_seq

Loading

0 comments on commit 86e93b9

Please sign in to comment.