Skip to content

Commit

Permalink
lib: add log messages in protein builder, #TASK-5776, #TASK-5564
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Aug 2, 2024
1 parent efa4824 commit 4326fa3
Showing 1 changed file with 17 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,20 @@ public void parse() throws CellBaseException, IOException {
}

logger.info(PARSING_LOG_MESSAGE, interProFiles.get(0));
String interproName = getDataName(INTERPRO_DATA);
int numLine = 0;
int numInterProLinesProcessed = 0;
int numUniqueProteinsProcessed = 0;
try (BufferedReader interproBuffereReader = FileUtils.newBufferedReader(interProFiles.get(0).toPath())) {

Set<String> hashSet = proteinMap.keySet();
Set<String> visited = new HashSet<>(proteinMap.size());

int numInterProLinesProcessed = 0;
int numUniqueProteinsProcessed = 0;
String[] fields;
String line;
boolean iprAdded;
while ((line = interproBuffereReader.readLine()) != null) {
numLine++;
fields = line.split("\t");

if (hashSet.contains(fields[0])) {
Expand Down Expand Up @@ -183,24 +187,21 @@ public void parse() throws CellBaseException, IOException {
visited.add(fields[0]);
numUniqueProteinsProcessed++;
}
} else {
logger.info("{} not found in protein map", fields[0]);
}

if (++numInterProLinesProcessed % 10000000 == 0) {
logger.info("{} {} lines processed", numInterProLinesProcessed, getDataName(INTERPRO_DATA));
logger.info("{} {} unique proteins processed", getDataName(INTERPRO_DATA), numUniqueProteinsProcessed);
printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);
}
}
logger.info("{} {} lines processed", numInterProLinesProcessed, getDataName(INTERPRO_DATA));
logger.info("{} {} unique proteins processed", getDataName(INTERPRO_DATA), numUniqueProteinsProcessed);
printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);

logger.info(PARSING_DONE_LOG_MESSAGE);
} catch (IOException e) {
throw new CellBaseException("Error parsing " + getDataName(INTERPRO_DATA) + " file: " + interProFiles.get(0), e);
logger.error("Error parsing {} file: {}. Num. line = {}. Error stack trace = {}", interproName, interProFiles.get(0),
numLine, Arrays.toString(e.getStackTrace()));
printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);
}


// Serialize and save results
RocksIterator rocksIterator = rocksDb.newIterator();
for (rocksIterator.seekToFirst(); rocksIterator.isValid(); rocksIterator.next()) {
Expand Down Expand Up @@ -279,4 +280,10 @@ private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOE
private String getMismatchNumFilesErrorMessage(String dataName, int numFiles) {
return "Only one " + dataName + " file is expected, but currently there are " + numFiles + " files";
}

private void printInfoLogs(int numInterProLinesProcessed, int numUniqueProteinsProcessed, String dataName) {
logger.info("{}: {} lines processed", dataName, numInterProLinesProcessed);
logger.info("{}: {} unique proteins processed", dataName, numUniqueProteinsProcessed);
}

}

0 comments on commit 4326fa3

Please sign in to comment.