-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'release-6.2.x' into TASK-6515
- Loading branch information
Showing
22 changed files
with
499 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: Manual Junit test the project | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
branch: | ||
description: 'Opencga branch to run the tests' | ||
default: 'develop' | ||
required: true | ||
fail-never: | ||
type: boolean | ||
description: 'The process executes all tests even if some fail.' | ||
default: false | ||
required: false | ||
|
||
jobs: | ||
test: | ||
name: JUnit Test | ||
runs-on: ubuntu-22.04 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ github.event.inputs.branch }} | ||
fetch-depth: '0' | ||
- name: Set up JDK 11 | ||
uses: actions/setup-java@v4 | ||
with: | ||
distribution: 'temurin' | ||
java-version: '11' | ||
cache: 'maven' | ||
- name: K8s Tunnel MongoDB | ||
run: | | ||
wget https://dl.k8s.io/release/v1.28.2/bin/linux/amd64/kubectl | ||
chmod +x ./kubectl | ||
echo "${{ secrets.AZURE_KUBE_CONFIG }}" > admin.conf | ||
./kubectl -n cellbase-db port-forward services/cellbase-rs0-svc 27017:27017 --kubeconfig ./admin.conf & | ||
- name: Install dependencies branches | ||
run: | | ||
if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then | ||
chmod +x ./.github/workflows/scripts/get_same_branch.sh | ||
./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} | ||
else | ||
echo "./.github/workflows/scripts/get_same_branch.sh does not exist." | ||
fi | ||
- name: Test and Analyze | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any | ||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} | ||
run: | | ||
FAIL_NEVER="" | ||
if [ "${{ github.event.inputs.fail-never }}" == "true" ]; then | ||
FAIL_NEVER="--fail-never" | ||
fi | ||
mvn install surefire-report:report ${FAIL_NEVER} -Dcheckstyle.skip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
name: Pull request approve workflow | ||
|
||
on: | ||
pull_request_review: | ||
types: [ submitted ] | ||
|
||
jobs: | ||
build: | ||
uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop | ||
|
||
test: | ||
name: "Test analysis" | ||
uses: ./.github/workflows/test-analysis.yml | ||
needs: build | ||
secrets: inherit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
gnomAD Mitochondrial DNA (mtDNA) variants v3.1: | ||
URL: https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1/vcf/genomes/gnomad.genomes.v3.1.sites.chrM.vcf.bgz | ||
|
||
Mapping file in ticket BIOINFO-99: mapping_file_gnomad_mt_mod_file.txt | ||
|
||
Script to preprocess original VCF from gnomad: gnomad_mt.py | ||
|
||
Script to load gnomad mt variants into OpenCGA and export them in json format annotation.populationFrequencies object: opencga_gnomad_mt.sh | ||
|
||
|
120 changes: 120 additions & 0 deletions
120
cellbase-app/app/scripts/gnomad/mitochondrial/gnomad_mt.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import sys | ||
import gzip | ||
|
||
|
||
POPULATIONS = ['afr', 'ami', 'amr', 'asj', 'eas', 'fin', 'nfe', 'oth', 'sas', 'mid'] | ||
HEADER_COMMON = [ | ||
'##INFO=<ID=AC,Number=1,Type=Integer,Description="Calculated allele count">', | ||
'##INFO=<ID=AF,Number=1,Type=Float,Description="Calculated allele frequency">', | ||
'##INFO=<ID=GTC,Number=1,Type=String,Description="Calculated list of genotype counts (0/0,0/1,1/1)">' | ||
] | ||
HEADER_POP = [ | ||
'##INFO=<ID=AF_{pop},Number=1,Type=Float,Description="Calculated allele frequency for {pop} population">', | ||
'##INFO=<ID=AC_{pop},Number=1,Type=Integer,Description="Calculated allele count for {pop} population">', | ||
'##INFO=<ID=AN_{pop},Number=1,Type=Integer,Description="Calculated allele number for {pop} population">', | ||
'##INFO=<ID=GTC_{pop},Number=1,Type=String,Description="Calculated list of genotype counts for {pop} population (0/0,0/1,1/1)">' | ||
] | ||
|
||
|
||
def main(): | ||
|
||
# Creating custom header | ||
custom_header = [] | ||
custom_header += HEADER_COMMON | ||
for pop in POPULATIONS: | ||
custom_header += ['\n'.join(HEADER_POP).format(pop=pop)] | ||
custom_header = '\n'.join(custom_header) + '\n' | ||
|
||
# Opening input/output files | ||
vcf_input_fpath = sys.argv[1] | ||
vcf_output_fpath = sys.argv[2] | ||
vcf_input_fhand = gzip.open(vcf_input_fpath, 'r') | ||
vcf_output_fhand = gzip.open(vcf_output_fpath, 'wt') | ||
|
||
# Calculating new INFO fields for each variant | ||
for line in vcf_input_fhand: | ||
line = line.decode() | ||
|
||
# Writing header to output | ||
if line.startswith('##VEP'): # adding custom header before "##VEP" line | ||
vcf_output_fhand.write(custom_header) | ||
vcf_output_fhand.write(line) | ||
continue | ||
if line.startswith('#'): | ||
vcf_output_fhand.write(line) | ||
continue | ||
|
||
# Dict to store the new calculated data | ||
new_info = {} | ||
|
||
# Getting variant and INFO data | ||
variant_items = line.strip().split() | ||
info_items = variant_items[7].split(';') | ||
|
||
for info_item in info_items: | ||
|
||
# Getting key/value for each INFO item | ||
if len(info_item.split('=', maxsplit=1)) < 2: # skipping flags | ||
continue | ||
info_key, info_value = info_item.split('=', maxsplit=1) | ||
|
||
# Getting INFO data for calculations | ||
if info_key == 'pop_AF_hom': | ||
pop_AF_hom = list(map(float, info_value.split('|'))) | ||
if info_key == 'pop_AF_het': | ||
pop_AF_het = list(map(float, info_value.split('|'))) | ||
if info_key == 'AF_hom': | ||
AF_hom = float(info_value) | ||
if info_key == 'AF_het': | ||
AF_het = float(info_value) | ||
if info_key == 'pop_AC_hom': | ||
pop_AC_hom = list(map(int, info_value.split('|'))) | ||
if info_key == 'pop_AC_het': | ||
pop_AC_het = list(map(int, info_value.split('|'))) | ||
if info_key == 'AC_hom': | ||
AC_hom = int(info_value) | ||
if info_key == 'AC_het': | ||
AC_het = int(info_value) | ||
if info_key == 'pop_AN': | ||
pop_AN = list(map(int, info_value.split('|'))) | ||
if info_key == 'AN': | ||
AN = int(info_value) | ||
|
||
# Calculating AF_{pop} and AF | ||
# e.g. AF_sas = pop_AF_hom[i] + pop_AF_het[i] (i = index of sas population) | ||
pop_AF = [x + y for x, y in zip(pop_AF_hom, pop_AF_het)] | ||
for i, pop in enumerate(POPULATIONS): | ||
new_info['AF_' + pop] = pop_AF[i] | ||
new_info['AF'] = AF_hom + AF_het | ||
|
||
# Calculating AC_{pop} and AC | ||
# e.g. AC_sas = pop_AC_hom[i] + pop_AC_het[i] (i = index of sas population) | ||
pop_AC = [x + y for x, y in zip(pop_AC_hom, pop_AC_het)] | ||
for i, pop in enumerate(POPULATIONS): | ||
new_info['AC_' + pop] = pop_AC[i] | ||
new_info['AC'] = AC_hom + AC_het | ||
|
||
# Calculating AN_{pop} | ||
# e.g. AN_sas = pop_AN[i] (i = index of sas population) | ||
for i, pop in enumerate(POPULATIONS): | ||
new_info['AN_' + pop] = pop_AN[i] | ||
|
||
# Calculating GTC_{pop} | ||
# e.g. GTC_sas = (pop_AN[i] - (pop_AC_het[i] + pop_AC_hom[i])) + "," + pop_AC_het[i] + "," + pop_AC_hom[i] | ||
pop_AC = [x + y for x, y in zip(pop_AC_hom, pop_AC_het)] | ||
hom_ref = [x - y for x, y in zip(pop_AN, pop_AC)] | ||
for i, pop in enumerate(POPULATIONS): | ||
new_info['GTC_' + pop] = ','.join(map(str, [hom_ref[i], pop_AC_het[i], pop_AC_hom[i]])) | ||
new_info['GTC'] = ','.join(map(str, [AN - (AC_hom + AC_het), AC_het, AC_hom])) | ||
|
||
# Joining existing INFO field and new custom INFO data | ||
custom_info_data = ';'.join(['='.join([k, str(new_info[k])]) for k in new_info]) | ||
new_info_field = ';'.join(info_items + [custom_info_data]) | ||
|
||
# Replacing original INFO field | ||
variant_items[7] = new_info_field | ||
vcf_output_fhand.write('\t'.join(variant_items) + '\n') | ||
|
||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |
44 changes: 44 additions & 0 deletions
44
cellbase-app/app/scripts/gnomad/mitochondrial/opencga_gnomad_mt.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
|
||
# Variables | ||
user="user" | ||
host="host_name" | ||
project="population" | ||
project_name="Population" | ||
study="gnomad_mt" | ||
study_name="gnomAD v3.1 Mitocondrial DNA Variants" | ||
study_path="data/"$study | ||
folder_path="/home/gnomad_mt" | ||
mapping_file="mapping_file_gnomad_mt_mod_file.txt" | ||
vcf_file="gnomad.genomes.v3.1.sites.chrM.mod.vcf.gz" | ||
mapping_file_path=$folder_path$mapping_file | ||
vcf_file_path=$folder_path$vcf_file | ||
|
||
# Login | ||
/home/opencga-client-2.12.0/bin/opencga.sh login $user --host $host | ||
|
||
# Project creation | ||
/home/opencga-client-2.12.0/bin/opencga.sh projects create --id $project --name $project_name --organism-scientific-name hsapiens --organism-assembly grch38 --host $host | ||
|
||
# Study creation | ||
/home/opencga-client-2.12.0/bin/opencga.sh studies create --id $study --name $study_name --project $project --host $host | ||
|
||
# Folders creation within Catalog | ||
/home/opencga-client-2.12.0/bin/opencga.sh files create --path $study_path --parents --study $study --type DIRECTORY --host $host | ||
|
||
# Uploading gnomad mt variants VCF and mapping file for gnomad mt variants | ||
/home/opencga-client-2.12.0/bin/opencga.sh files upload -i $mapping_file_path --path $study_path --study $study --host $host | ||
|
||
/home/opencga-client-2.12.0/bin/opencga.sh files upload -i $vcf_file_path --path $study_path --study $study --host $host | ||
|
||
# Variant index for gnomad mt variants VCF | ||
/home/opencga-client-2.12.0/bin/opencga.sh operations variant-index --study $study --file $vcf_file --load-archive NO --load-split-data CHROMOSOME --host $host | ||
|
||
# Variant stats index for gnomad mt variants. The corresponding cohorts and variant cohort stats will be generated using the information of interest provided in the mapping file and INFO column of the gnomad mt VCF | ||
/home/opencga-client-2.12.0/bin/opencga.sh operations variant-stats-index --study $study --aggregation-mapping-file $mapping_file --aggregated BASIC --host $host | ||
|
||
# Variant cohort stats will be converted to population frequencies data model (julie-tool) | ||
/home/opencga-client-2.12.0/bin/opencga.sh operations variant-julie-run --project $project --host $host | ||
|
||
# Export of annotation.populationFrequencies in json format | ||
/home/opencga-client-2.12.0/bin/opencga.sh variant export-run --body_include annotation.populationFrequencies --body_project $project --project $project --output-file-format json --host $host |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright 2015-2020 OpenCB | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import argparse | ||
import os | ||
import requests | ||
import sys | ||
import json | ||
import pathlib | ||
from pathlib import Path | ||
|
||
|
||
## Configure command-line options | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('-i', help="VCF file", required=True) | ||
|
||
|
||
## Parse command-line parameters and init basedir, tag and build_folder | ||
args = parser.parse_args() | ||
print(args.i) | ||
|
||
if os.path.isfile(args.i) == False: | ||
print("no existe") | ||
|
||
|
||
# Opening file | ||
vcf_file = open(args.i, 'r') | ||
count = 0 | ||
|
||
# Using for loop | ||
print("Using for loop") | ||
for line in vcf_file: | ||
count += 1 | ||
if not line.startswith("#"): | ||
line = line.strip() | ||
cols = line.split("\t") | ||
print(line) | ||
info_cols = cols[7].split(";") | ||
var = [x for x in info_cols if x.startswith("AN=")] | ||
print("{}".format(var)) | ||
|
||
|
||
# Closing files | ||
vcf_file.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.