Skip to content

Commit

Permalink
feat: introduce FileSystem.unjar once parameter
Browse files Browse the repository at this point in the history
`specs2-html` currently copies all of its html resources (41 files) for each specification (executed with html output), reading and traversing the `specs2-html.jar` four times in the process, overwriting its own previously copied files over and over. This is inefficient and causes unnecessary strain on the disk usage. The newly introduced `once` parameter is used by `specs2-html` to only unjar its resources once for each target location and filter criteria.
  • Loading branch information
NTPape committed Jun 9, 2024
1 parent f5376e2 commit 3da7b60
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 28 deletions.
75 changes: 48 additions & 27 deletions common/shared/src/main/scala/org/specs2/io/FileSystem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ case class FileSystem(logger: Logger) extends FilePathReader:
def mkdirs(path: FilePath): Operation[Unit] =
mkdirs(path.dir)

private object UnjarLRUCache:
private var unjarLRUCache: Map[(URL, DirectoryPath, String), Long] = Map.empty
private val maxSize = 1000

/** Checks if the given parameters were already processed; if not immediately adds them to the cache. */
def alreadyUnjared(params: (URL, DirectoryPath, String)): Boolean =
UnjarLRUCache.synchronized:
val alreadyUnjared = unjarLRUCache.contains(params)
unjarLRUCache += params -> System.nanoTime
if !alreadyUnjared then clean()
alreadyUnjared

/** Clean up LRU entries until cache is at most max size. */
private def clean(): Unit = while unjarLRUCache.size > maxSize do unjarLRUCache -= unjarLRUCache.minBy(_._2)._1

/** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
* extracted with a regular expression.
* @param jarUrl
Expand All @@ -67,34 +82,40 @@ case class FileSystem(logger: Logger) extends FilePathReader:
* @param regexFilter
* regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
* entry as group 1 which will then be used relative to dirPath as target path for that entry
* @param once
* whether the filtered content of the jar should only be extracted once to the destination; can be used when it is
* known that the content will not change; defaults to false for backwards compatibility
*/
def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
val regex = compile(regexFilter)
val uis = jarUrl.openStream()
val zis = new ZipInputStream(new BufferedInputStream(uis))

@annotation.tailrec
def extractEntry(entry: ZipEntry): Unit =
if entry != null then
val matcher = regex.matcher(entry.getName)
if matcher.matches then
val target = matcher.replaceFirst(s"${quoteReplacement(dest.path)}$$1")
if !entry.isDirectory then
new File(target).getParentFile.mkdirs
new File(target).createNewFile
val fos = new FileOutputStream(target)
val dest = new BufferedOutputStream(fos, 2048)
try
copy(zis, dest)
dest.flush
finally dest.close

extractEntry(zis.getNextEntry)

Operation.delayed {
try extractEntry(zis.getNextEntry)
finally zis.close
}
def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String, once: Boolean = false): Operation[Unit] =
val toSkip = once && UnjarLRUCache.alreadyUnjared((jarUrl, dest, regexFilter))
if toSkip then Operation.ok(())
else
val regex = compile(regexFilter)
val uis = jarUrl.openStream()
val zis = new ZipInputStream(new BufferedInputStream(uis))

@annotation.tailrec
def extractEntry(entry: ZipEntry): Unit =
if entry != null then
val matcher = regex.matcher(entry.getName)
if matcher.matches then
val target = matcher.replaceFirst(s"${quoteReplacement(dest.path)}$$1")
if !entry.isDirectory then
new File(target).getParentFile.mkdirs
new File(target).createNewFile
val fos = new FileOutputStream(target)
val dest = new BufferedOutputStream(fos, 2048)
try
copy(zis, dest)
dest.flush
finally dest.close

extractEntry(zis.getNextEntry)

Operation.delayed {
try extractEntry(zis.getNextEntry)
finally zis.close
}

/** Copy an input stream to an output stream.
* @param input
Expand Down
2 changes: 1 addition & 1 deletion html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ case class HtmlPrinter(env: Env, searchPage: SearchPage, logger: Logger = Consol
case Some(url) =>
val fs = env.fileSystem
if url.getProtocol.equalsIgnoreCase("jar") then
fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$", once = true)
else fs.copyDir(DirectoryPath.unsafe(url.toURI), outputDir / src)
case _ =>
val message = s"no resource found for path ${(base / src).path}"
Expand Down

0 comments on commit 3da7b60

Please sign in to comment.