Skip to content

Commit

Permalink
#17 - report matching pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
endixk committed Oct 23, 2023
1 parent 0057f95 commit c190ee0
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 31 deletions.
45 changes: 28 additions & 17 deletions src/leb/main/EzAAI.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public EzAAI(String module) {
path_ufasta = "ufasta";

String label = null; // convert, extract
String input2 = null, mtxout = null; int thread = 10; double identity = 0.4, coverage = 0.5; // calculate
String input2 = null, matchout = null, mtxout = null; int thread = 10; double identity = 0.4, coverage = 0.5; // calculate
int program = PROGRAM_MMSEQS; // calculate
boolean useid = false; // cluster

Expand Down Expand Up @@ -169,6 +169,7 @@ else if(!arg.get("-s").equals("nucl")) {
}
if(arg.get("-id") != null) identity = Double.parseDouble(arg.get("-id"));
if(arg.get("-cov") != null) coverage = Double.parseDouble(arg.get("-cov"));
if(arg.get("-match") != null) matchout = arg.get("-match");
if(arg.get("-mtx") != null) mtxout = arg.get("-mtx");
if(arg.get("-t") != null) thread = Integer.parseInt(arg.get("-t"));
}
Expand Down Expand Up @@ -412,7 +413,14 @@ else if(!faaDir.isDirectory()) {
br.close();
(new File("mm.label")).delete();
}


// prepare match output
BufferedWriter maw = null;
if(matchout != null) {
maw = new BufferedWriter(new FileWriter(matchout));
maw.write("ID 1\tID 2\tLabel 1\tLabel 2\tCDS 1\tCDS 2\tForward\tBackward\tAverage\n");
}

// run MMSeqs for each fasta pairs
Double[][] aaiTable = new Double[ilist.size()][jlist.size()];
Integer[][] hitTable = new Integer[ilist.size()][jlist.size()];
Expand All @@ -422,9 +430,9 @@ else if(!faaDir.isDirectory()) {
for(int j = 0; j < jlist.size(); j++) {
Prompt.print(String.format("Calculating AAI... [Task %d/%d]", i*jlist.size()+j+1, ilist.size()*jlist.size()));
ProcCalcPairwiseAAI procAAI = new ProcCalcPairwiseAAI();

switch(program) {
case PROGRAM_MMSEQS:
case PROGRAM_MMSEQS:
procAAI.setPath(path_mmseqs);
procAAI.setMode(ProcCalcPairwiseAAI.MODE_MMSEQS);
break;
Expand All @@ -441,13 +449,15 @@ else if(!faaDir.isDirectory()) {
procAAI.setNthread(thread);
procAAI.setIdentity(identity);
procAAI.setCoverage(coverage);
List<String> res = procAAI.calculateProteomePairWithDetails(ilist.get(i), jlist.get(j));
if (maw != null) procAAI.setMatchout(maw);
List<String> res = procAAI.calculateProteomePairWithDetails(ilabs.get(i), jlabs.get(j), ilist.get(i), jlist.get(j));
hitTable[i][j] = Integer.parseInt(res.get(4));
aaiTable[i][j] = Double.parseDouble(res.get(6));
if(j == 0) ilens[i] = Integer.parseInt(res.get(0));
if(i == 0) jlens[j] = Integer.parseInt(res.get(1));
}
}
if(maw != null) maw.close();

// print result into output file
BufferedWriter bw = new BufferedWriter(new FileWriter(output, outExists));
Expand Down Expand Up @@ -484,7 +494,7 @@ else if(!faaDir.isDirectory()) {
Prompt.print("Task finished.");
return 0;
}

private int runCluster() {
Prompt.debug("EzAAI - cluster module");

Expand Down Expand Up @@ -718,22 +728,23 @@ private static void printHelp(int module) {

System.out.println(ANSIHandler.wrapper("\n Required options", 'Y'));
System.out.println(ANSIHandler.wrapper(" Argument\tDescription", 'c'));
System.out.printf(" %s\t\t%s%n", "-i", "First input protein DB / directory with protein DBs");
System.out.printf(" %s\t\t%s%n", "-j", "Second input protein DB / directory with protein DBs");
System.out.printf(" %s\t\t%s%n", "-o", "Output result file");
System.out.printf(" %s\t%s%n", "-i ", "First input protein DB / directory with protein DBs");
System.out.printf(" %s\t%s%n", "-j ", "Second input protein DB / directory with protein DBs");
System.out.printf(" %s\t%s%n", "-o ", "Output result file");
System.out.println();

System.out.println(ANSIHandler.wrapper("\n Additional options", 'y'));
System.out.println(ANSIHandler.wrapper(" Argument\tDescription", 'c'));
System.out.printf(" %s\t\t%s%n", "-p", "Customize calculation program [mmseqs / diamond / blastp] (default: mmseqs)");
System.out.printf(" %s\t\t%s%n", "-t", "Number of CPU threads to use (default: 10)");
System.out.printf(" %s\t\t%s%n", "-id", "Minimum identity threshold for AAI calculations [0 - 1.0] (default: 0.4)");
System.out.printf(" %s\t\t%s%n", "-cov", "Minimum query coverage threshold for AAI calculations [0 - 1.0] (default: 0.5)");
System.out.printf(" %s\t\t%s%n", "-mtx", "Matrix Market formatted output");
System.out.printf(" %s\t%s%n", "-mmseqs", "Custom path to MMSeqs2 binary (default: mmseqs)");
System.out.printf(" %s\t%s%n", "-p ", "Customize calculation program [mmseqs / diamond / blastp] (default: mmseqs)");
System.out.printf(" %s\t%s%n", "-t ", "Number of CPU threads to use (default: 10)");
System.out.printf(" %s\t%s%n", "-id ", "Minimum identity threshold for AAI calculations [0 - 1.0] (default: 0.4)");
System.out.printf(" %s\t%s%n", "-cov ", "Minimum query coverage threshold for AAI calculations [0 - 1.0] (default: 0.5)");
System.out.printf(" %s\t%s%n", "-match ", "Path to write a result of matched CDS names");
System.out.printf(" %s\t%s%n", "-mtx ", "Path to write a Matrix Market formatted output");
System.out.printf(" %s\t%s%n", "-mmseqs ", "Custom path to MMSeqs2 binary (default: mmseqs)");
System.out.printf(" %s\t%s%n", "-diamond", "Custom path to DIAMOND binary (default: diamond)");
System.out.printf(" %s\t%s%n", "-blastp", "Custom path to BLASTp+ binary (default: blastp)");
System.out.printf(" %s\t%s%n", "-makeblastdb", "Custom path to makeblastdb binary (default: makeblastdb)");
System.out.printf(" %s\t%s%n", "-blastp ", "Custom path to BLASTp+ binary (default: blastp)");
System.out.printf(" %s\t%s%n", "-blastdb", "Custom path to makeblastdb binary (default: makeblastdb)");
System.out.println();
}
if(module == MODULE_CLUSTER) {
Expand Down
54 changes: 40 additions & 14 deletions src/leb/process/ProcCalcPairwiseAAI.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package leb.process;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
Expand Down Expand Up @@ -38,21 +39,25 @@ public class ProcCalcPairwiseAAI {
private double identity = 40.0, coverage = 0.5;
private String path = null;
public void setPath(String path) {this.path = path;}

private String dbpath = null; // makeblastdb
public void setDbpath(String dbpath) {this.dbpath = dbpath;}

public void setIdentity(double identity) {
this.identity = identity * 100;
}
public void setCoverage(double coverage) {
this.coverage = coverage;
}
private BufferedWriter maw = null;
public void setMatchout(BufferedWriter maw) {
this.maw = maw;
}
private String label1 = null, label2 = null;

public ProcCalcPairwiseAAI() {}

// returns [cds1, cds2, hit1, hit2, recHit, avglen, aai]
public List<String> calculateProteomePairWithDetails(String faa1, String faa2) throws IOException {
public List<String> calculateProteomePairWithDetails(String label1, String label2, String faa1, String faa2) throws IOException {
this.label1 = label1; this.label2 = label2;
List<String> res = new ArrayList<>();
// Switch mode
switch(mode){
Expand All @@ -66,10 +71,9 @@ public List<String> calculateProteomePairWithDetails(String faa1, String faa2) t
return res;
}

private void mapLength(BufferedReader br1, BufferedReader br2,
Map<String, Integer> lmap,
Map<String, Integer> nmap1,
Map<String, Integer> nmap2) throws IOException {
private void mapLength(BufferedReader br1, BufferedReader br2, Map<String, Integer> lmap,
Map<String, Integer> nmap1, Map<String, Integer> nmap2,
List<String> nlist1, List<String> nlist2) throws IOException {
int n1 = 0, n2 = 0;

String buf;
Expand All @@ -81,6 +85,7 @@ private void mapLength(BufferedReader br1, BufferedReader br2,

lmap.put(id, (ed - st - 2) / 3);
nmap1.put(id, n1++);
nlist1.add(id);
}
while((buf = br2.readLine()) != null){
if(!buf.startsWith(">")) continue;
Expand All @@ -90,6 +95,7 @@ private void mapLength(BufferedReader br1, BufferedReader br2,

lmap.put(id, (ed - st - 2) / 3);
nmap2.put(id, n2++);
nlist2.add(id);
}
}

Expand Down Expand Up @@ -151,10 +157,13 @@ private List<String> calcIdentityWithDetails(
List<Blast6FormatHitDomain> hits_versa,
Map<String, Integer> lengthMap,
Map<String, Integer> nameMap1,
Map<String, Integer> nameMap2) {
Map<String, Integer> nameMap2,
List<String> nameList1,
List<String> nameList2) {
List<String> res = new ArrayList<>();
int fac = (mode == MODE_MMSEQS ? 100 : 1);
int n1 = nameMap1.size(), n2 = nameMap2.size();
Prompt.debug(String.format("n1 = %d, n2 = %d, l1 = %d, l2= %d", n1, n2, nameList1.size(), nameList2.size()));
res.add(String.valueOf(n2));
res.add(String.valueOf(n1));

Expand Down Expand Up @@ -192,6 +201,17 @@ private List<String> calcIdentityWithDetails(
nval++;
isum += viceMatrix[i][j] + versaMatrix[i][j];
lsum += viceLength[i][j] + versaLength[i][j];
if(maw != null) {
try {
maw.write(String.format("%d\t%d\t%s\t%s\t%s\t%s\t%.3f\t%.3f\t%.3f\n",
Math.abs(label1.hashCode()) % (1<<30), Math.abs(label2.hashCode()) % (1<<30),
label1, label2, nameList1.get(j), nameList2.get(i),
viceMatrix[i][j], versaMatrix[i][j], (viceMatrix[i][j] + versaMatrix[i][j]) / 2));
} catch(IOException e) {
Prompt.error("FATAL ERROR : Failed to write match output.");
return null;
}
}
}
/* else if(viceMatrix[i][j] >= 40.0 || versaMatrix[i][j] >= 40.0) {
Prompt.debug(String.format("Non-reciprocal hit : %.3f / %.3f", viceMatrix[i][j], versaMatrix[i][j]));
Expand Down Expand Up @@ -221,8 +241,10 @@ private List<String> pairwiseBlastp(String faa1, String faa2) throws IOException
Map<String, Integer> lengthMap = new HashMap<>();
Map<String, Integer> nameMap1 = new HashMap<>(),
nameMap2 = new HashMap<>();
List<String> nameList1 = new ArrayList<>(),
nameList2 = new ArrayList<>();

mapLength(br1, br2, lengthMap, nameMap1, nameMap2);
mapLength(br1, br2, lengthMap, nameMap1, nameMap2, nameList1, nameList2);
br1.close(); br2.close();

// Run pairwise BLASTp
Expand Down Expand Up @@ -257,7 +279,7 @@ private List<String> pairwiseBlastp(String faa1, String faa2) throws IOException
}

// Collect pairs with reciprocal hits with id 40%+, q_cov 50%+
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap1, nameMap2);
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap1, nameMap2, nameList1, nameList2);
}
/*
private double pairwiseUsearch(String faa1, String faa2) throws IOException {
Expand Down Expand Up @@ -313,8 +335,10 @@ private List<String> pairwiseMmseqs(String faa1, String faa2) throws IOException
Map<String, Integer> lengthMap = new HashMap<>();
Map<String, Integer> nameMap1 = new HashMap<>(),
nameMap2 = new HashMap<>();
List<String> nameList1 = new ArrayList<>(),
nameList2 = new ArrayList<>();

mapLength(br1, br2, lengthMap, nameMap1, nameMap2);
mapLength(br1, br2, lengthMap, nameMap1, nameMap2, nameList1, nameList2);
br1.close(); br2.close();

// Run MMSeqs2 reciprocal best hit search
Expand Down Expand Up @@ -375,7 +399,7 @@ else if(!mmout.isDirectory()) {
}

// Collect pairs with reciprocal hits with id 40%+, q_cov 50%+
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap2, nameMap1);
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap2, nameMap1, nameList1, nameList2);
}

private List<String> pairwiseDiamond(String faa1, String faa2, boolean sensitive) throws IOException {
Expand All @@ -385,8 +409,10 @@ private List<String> pairwiseDiamond(String faa1, String faa2, boolean sensitive
Map<String, Integer> lengthMap = new HashMap<>();
Map<String, Integer> nameMap1 = new HashMap<>(),
nameMap2 = new HashMap<>();
List<String> nameList1 = new ArrayList<>(),
nameList2 = new ArrayList<>();

mapLength(br1, br2, lengthMap, nameMap1, nameMap2);
mapLength(br1, br2, lengthMap, nameMap1, nameMap2, nameList1, nameList2);
br1.close(); br2.close();

// Run MMSeqs2 reciprocal best hit search
Expand Down Expand Up @@ -423,6 +449,6 @@ else if(!dmout.isDirectory()) {
}

// Collect pairs with reciprocal hits with id 40%+, q_cov 50%+
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap2, nameMap1);
return calcIdentityWithDetails(hits_vice, hits_versa, lengthMap, nameMap2, nameMap1, nameList1, nameList2);
}
}

0 comments on commit c190ee0

Please sign in to comment.