Skip to content

Commit

Permalink
Make TextCache a Spring bean
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Jul 31, 2023
1 parent 9fe6aff commit ac1e0f2
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 21 deletions.
10 changes: 7 additions & 3 deletions ui/src/bamboo/api/DataApiController.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import bamboo.crawl.Crawl;
import bamboo.crawl.Warc;
import bamboo.crawl.WarcsController;
import bamboo.task.TextCache;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.core.io.PathResource;
Expand All @@ -27,14 +29,16 @@
public class DataApiController {
private final Bamboo wa;
private final WarcsController warcsController;
private final TextCache textCache;
@Value("${data_api.allowed_series_ids}")
private Set<Long> allowedSeriesIds;
@Value("${data_api.credentials}")
private Set<String> credentials;

public DataApiController(Bamboo wa, WarcsController warcsController) {
public DataApiController(Bamboo wa, WarcsController warcsController, @Autowired(required = false) TextCache textCache) {
this.wa = wa;
this.warcsController = warcsController;
this.textCache = textCache;
}

public static class MissingCredentialsException extends Exception {
Expand Down Expand Up @@ -130,13 +134,13 @@ public PathResource getText(@PathVariable long warcId,
HttpServletRequest request,
HttpServletResponse response) throws AccessDeniedException, MissingCredentialsException {
enforceAgwaCredentials(request);
if (warcsController.textCache == null) {
if (textCache == null) {
throw new NotFoundException("Text cache is disabled");
}
var warc = wa.warcs.get(warcId);
var crawl = wa.crawls.get(warc.getCrawlId());
enforceAgwaCrawl(crawl);
var textPath = warcsController.textCache.find(warcId);
var textPath = textCache.find(warcId);
if (textPath == null) {
throw new NotFoundException("No text for warc " + warcId);
}
Expand Down
4 changes: 2 additions & 2 deletions ui/src/bamboo/app/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ public static void main(String[] args) throws IOException {
endId = Long.parseLong(args[3]);
}
}
new TextCache(Paths.get(args[1]), bamboo.warcs, bamboo.textExtractor).populateAll(startId, endId);
new TextCache(Paths.get(args[1]), bamboo).populateAll(startId, endId);
break;
}

case "build-text-cache-series": {
new TextCache(Paths.get(args[1]), bamboo.warcs, bamboo.textExtractor).populateSeries(Long.parseLong(args[2]));
new TextCache(Paths.get(args[1]), bamboo).populateSeries(Long.parseLong(args[2]));
break;
}

Expand Down
16 changes: 3 additions & 13 deletions ui/src/bamboo/crawl/WarcsController.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.netpreserve.jwarc.WarcReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.security.crypto.codec.Hex;
Expand Down Expand Up @@ -53,20 +54,9 @@ public class WarcsController {
final Bamboo wa;
public final TextCache textCache;

public WarcsController(Bamboo wa) {
public WarcsController(Bamboo wa, @Autowired(required = false) TextCache textCache) {
this.wa = Objects.requireNonNull(wa);
String textCachePath = System.getenv("WARC_TEXT_CACHE");
if (textCachePath != null) {
log.info("WARC_TEXT_CACHE: {}", textCachePath);
Path root = Paths.get(textCachePath);
if (!Files.exists(root)) {
throw new RuntimeException("WARC_TEXT_CACHE not found: " + textCachePath);
}
textCache = new TextCache(root, wa.warcs, wa.textExtractor);
} else {
log.info("WARC_TEXT_CACHE disabled");
textCache = null;
}
this.textCache = textCache;
}

static class Range {
Expand Down
18 changes: 15 additions & 3 deletions ui/src/bamboo/task/TextCache.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
package bamboo.task;

import bamboo.app.Bamboo;
import bamboo.crawl.Warc;
import bamboo.crawl.Warcs;
import io.swagger.v3.oas.annotations.servers.Server;
import org.archive.io.ArchiveReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.io.OutputStream;
Expand All @@ -19,16 +25,22 @@
import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;

@Component
@ConditionalOnProperty("WARC_TEXT_CACHE")
public class TextCache {
private static final Logger log = LoggerFactory.getLogger(CdxCache.class);
private final Path root;
private Warcs warcs;
private final TextExtractor extractor;

public TextCache(Path root, Warcs warcs, TextExtractor textExtractor) {
public TextCache(@Value("${WARC_TEXT_CACHE}") Path root, Bamboo wa) throws IOException {
this.root = root;
this.warcs = warcs;
extractor = textExtractor;
this.warcs = wa.warcs;
extractor = wa.textExtractor;
log.info("TextCache at {}", root);
if (Files.exists(root)) {
Files.createDirectories(root);
}
}

public Path entryPath(long warcId) {
Expand Down

0 comments on commit ac1e0f2

Please sign in to comment.