From 0057f95d5f768fa164f1c571deb1ae6423f593c4 Mon Sep 17 00:00:00 2001 From: DW Kim Date: Thu, 5 Oct 2023 15:12:50 +0900 Subject: [PATCH] cluster - uid labeling --- src/leb/main/EzAAI.java | 10 +++++++++- src/leb/process/ProcUPGMA.java | 10 ++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/leb/main/EzAAI.java b/src/leb/main/EzAAI.java index 85eea9c..51bf042 100644 --- a/src/leb/main/EzAAI.java +++ b/src/leb/main/EzAAI.java @@ -75,6 +75,7 @@ public EzAAI(String module) { String label = null; // convert, extract String input2 = null, mtxout = null; int thread = 10; double identity = 0.4, coverage = 0.5; // calculate int program = PROGRAM_MMSEQS; // calculate + boolean useid = false; // cluster private int parseArguments(String[] args) { String modstr = ""; @@ -171,6 +172,9 @@ else if(!arg.get("-s").equals("nucl")) { if(arg.get("-mtx") != null) mtxout = arg.get("-mtx"); if(arg.get("-t") != null) thread = Integer.parseInt(arg.get("-t")); } + if(module == MODULE_CLUSTER) { + if(arg.get("-u") != null) useid = true; + } if(arg.get("-prodigal") != null) path_prodigal = arg.get("-prodigal"); if(arg.get("-mmseqs") != null) path_mmseqs = arg.get("-mmseqs"); @@ -486,6 +490,7 @@ private int runCluster() { // parse input file Map imap = new HashMap<>(); + List ids = new ArrayList<>(); List labels = new ArrayList<>(); List bufs = new ArrayList<>(); try { @@ -498,10 +503,12 @@ private int runCluster() { String lab1 = buf.split("\t")[2], lab2 = buf.split("\t")[3]; if(!imap.containsKey(id1)) { imap.put(id1, labels.size()); + ids.add(id1); labels.add(lab1); } if(!imap.containsKey(id2)) { imap.put(id2, labels.size()); + ids.add(id2); labels.add(lab2); } } @@ -551,7 +558,7 @@ private int runCluster() { Prompt.print("AAI matrix identified. Running hierarchical clustering with UPGMA method..."); // produce UPGMA tree - ProcUPGMA upgma = new ProcUPGMA(dmat, labels); + ProcUPGMA upgma = new ProcUPGMA(dmat, ids, labels, useid); BufferedWriter bw = new BufferedWriter(new FileWriter(output)); bw.write(upgma.getTree() + "\n"); bw.close(); @@ -741,6 +748,7 @@ private static void printHelp(int module) { System.out.println(ANSIHandler.wrapper(" Argument\tDescription", 'c')); System.out.printf(" %s\t\t%s%n", "-i", "Input EzAAI result file containing all-by-all pairwise AAI values"); System.out.printf(" %s\t\t%s%n", "-o", "Output result file"); + System.out.printf(" %s\t\t%s%n", "-u", "Use ID instead of label for tree"); System.out.println(); } } diff --git a/src/leb/process/ProcUPGMA.java b/src/leb/process/ProcUPGMA.java index 2dab817..22dd66e 100644 --- a/src/leb/process/ProcUPGMA.java +++ b/src/leb/process/ProcUPGMA.java @@ -11,9 +11,15 @@ public class ProcUPGMA { private final List counts = new LinkedList<>(); private int N; - public ProcUPGMA(double[][] dmat, List labels) { + public ProcUPGMA(double[][] dmat, List ids, List labels, boolean useid) { this.dmat = dmat; - this.leaves.addAll(labels); + if(useid) { + for(int id : ids) { + this.leaves.add(String.format("%d", id)); + } + } else { + this.leaves.addAll(labels); + } N = labels.size(); for(int i = 0; i < N; i++) {