Skip to content

Commit

Permalink
add improper_check_maximum_unique_pairs option
Browse files Browse the repository at this point in the history
  • Loading branch information
friend1ws committed Dec 7, 2018
1 parent eb97fe5 commit bf7bcef
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 5 deletions.
5 changes: 4 additions & 1 deletion genomon_sv/arg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def create_parser():
# top level parser
parser = argparse.ArgumentParser(prog = "GenomonSV", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument("--version", action = "version", version = "GenomonSV-0.6.0")
parser.add_argument("--version", action = "version", version = "GenomonSV-0.6.1b1")

subparsers = parser.add_subparsers()

Expand Down Expand Up @@ -82,6 +82,9 @@ def create_parser():
cluster_improper_group.add_argument("--improper_check_margin_size", type = int, default = 1500,
help = "This should be sufficiently greater than insert size, but the computational time will increase when too large (default: %(default)s)")

cluster_improper_group.add_argument("--improper_check_maximum_unique_pairs", type = int, default = 10000,
help = "The number of unique improper pairs at each local region. \
Set mainly for avoiding local regions with extremely high depths and huge amount of junction reads (default: %(default)s)")

parse_parser.set_defaults(func = genomonSV_parse)
####################
Expand Down
13 changes: 11 additions & 2 deletions genomon_sv/parseFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ def clusterJunction(inputFilePath, outputFilePath, check_margin_size, maximum_un

if len(mergedJunction) > maximum_unique_pairs:
print >> sys.stderr, "Exceeded maximum number of unique junction pairs at %s:%s-%s" % (F[0], F[1], F[2])
print >> sys.stderr, "Skipp %s:%s-%s" % (F[0], F[1], str(int(F[2]) + check_margin_size))
print >> sys.stderr, "Skip %s:%s-%s" % (F[0], F[1], str(int(F[2]) + check_margin_size))
mergedJunction = {}
mergedBedpeInfo = {}
skip_pos = int(F[1]) + check_margin_size
Expand Down Expand Up @@ -737,17 +737,21 @@ def makeImproperBedpe(inputFilePath, outputFilePath, junction_dist_margin, clipp



def clusterImproperBedpe(inputFilePath, outputFilePath, check_margin_size):
def clusterImproperBedpe(inputFilePath, outputFilePath, check_margin_size, maximum_unique_pairs):

####################
# cluster and summarize improper read pair bed file
hIN = open(inputFilePath, "r")
hOUT = open(outputFilePath, "w")

mergedBedpe = {}
temp_chr = None
skip_pos = 0
for line in hIN:

F = line.rstrip('\n').split('\t')
if F[0] != temp_chr: temp_chr, skip_pos = F[0], 0
if int(F[1]) < skip_pos: continue

match = 0
delList = []
Expand Down Expand Up @@ -796,6 +800,11 @@ def clusterImproperBedpe(inputFilePath, outputFilePath, check_margin_size):
newKey = '\t'.join([F[0], F[1], F[2], F[3], F[4], F[5], F[8], F[9]])
mergedBedpe[newKey] = F[6] + '\t' + F[7] + '\t' + F[10]

if len(mergedBedpe) > maximum_unique_pairs:
print >> sys.stderr, "Exceeded maximum number of unique improper pairs at %s:%s-%s" % (F[0], F[1], F[2])
print >> sys.stderr, "Skip %s:%s-%s" % (F[0], F[1], str(int(F[2]) + check_margin_size))
mergedBedpe
skip_pos = int(F[1]) + check_margin_size

for key in sorted(mergedBedpe):

Expand Down
3 changes: 2 additions & 1 deletion genomon_sv/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def genomonSV_parse(args):

# cluster read pairs possibly representing the same junction
utils.processingMessage("clustering improperly aligned read pairs")
parseFunction.clusterImproperBedpe(args.output_prefix + ".improper.bedpe", args.output_prefix + ".improper.clustered.unsort.bedpe", args.improper_check_margin_size)
parseFunction.clusterImproperBedpe(args.output_prefix + ".improper.bedpe", args.output_prefix + ".improper.clustered.unsort.bedpe",
args.improper_check_margin_size, args.improper_check_maximum_unique_pairs)

utils.processingMessage("sorting clustered improperly aligned read pairs")
utils.sortBedpe(args.output_prefix + ".improper.clustered.unsort.bedpe", args.output_prefix + ".improper.clustered.bedpe")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name = 'genomon_sv',
version = '0.6.0',
version = '0.6.1b1',
description='Python tools for detecting somatic structural variation from cancer genome sequencing data.',
url = 'https://github.com/friend1ws/GenomonSV',
author = 'Yuichi Shiraishi',
Expand Down

0 comments on commit bf7bcef

Please sign in to comment.