feat: introduce FileSystem.unjar once parameter

`specs2-html` currently copies all of its html resources (41 files) for each specification (executed with html output), reading and traversing the `specs2-html.jar` four times in the process, overwriting its own previously copied files over and over. This is inefficient and causes unnecessary strain on the disk usage. The newly introduced `once` parameter is used by `specs2-html` to only unjar its resources once for each target location and filter criteria.
etorreborre · Jun 9, 2024 · 3da7b60 · 3da7b60
1 parent f5376e2
commit 3da7b60
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 28 deletions.
diff --git a/common/shared/src/main/scala/org/specs2/io/FileSystem.scala b/common/shared/src/main/scala/org/specs2/io/FileSystem.scala
@@ -58,6 +58,21 @@ case class FileSystem(logger: Logger) extends FilePathReader:
   def mkdirs(path: FilePath): Operation[Unit] =
     mkdirs(path.dir)
 
+  private object UnjarLRUCache:
+    private var unjarLRUCache: Map[(URL, DirectoryPath, String), Long] = Map.empty
+    private val maxSize = 1000
+
+    /** Checks if the given parameters were already processed; if not immediately adds them to the cache. */
+    def alreadyUnjared(params: (URL, DirectoryPath, String)): Boolean =
+      UnjarLRUCache.synchronized:
+        val alreadyUnjared = unjarLRUCache.contains(params)
+        unjarLRUCache += params -> System.nanoTime
+        if !alreadyUnjared then clean()
+        alreadyUnjared
+
+    /** Clean up LRU entries until cache is at most max size. */
+    private def clean(): Unit = while unjarLRUCache.size > maxSize do unjarLRUCache -= unjarLRUCache.minBy(_._2)._1
+
   /** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
     * extracted with a regular expression.
     * @param jarUrl
@@ -67,34 +82,40 @@ case class FileSystem(logger: Logger) extends FilePathReader:
     * @param regexFilter
     *   regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
     *   entry as group 1 which will then be used relative to dirPath as target path for that entry
+    * @param once
+    *   whether the filtered content of the jar should only be extracted once to the destination; can be used when it is
+    *   known that the content will not change; defaults to false for backwards compatibility
     */
-  def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
-    val regex = compile(regexFilter)
-    val uis = jarUrl.openStream()
-    val zis = new ZipInputStream(new BufferedInputStream(uis))
-
-    @annotation.tailrec
-    def extractEntry(entry: ZipEntry): Unit =
-      if entry != null then
-        val matcher = regex.matcher(entry.getName)
-        if matcher.matches then
-          val target = matcher.replaceFirst(s"${quoteReplacement(dest.path)}$$1")
-          if !entry.isDirectory then
-            new File(target).getParentFile.mkdirs
-            new File(target).createNewFile
-            val fos = new FileOutputStream(target)
-            val dest = new BufferedOutputStream(fos, 2048)
-            try
-              copy(zis, dest)
-              dest.flush
-            finally dest.close
-
-        extractEntry(zis.getNextEntry)
-
-    Operation.delayed {
-      try extractEntry(zis.getNextEntry)
-      finally zis.close
-    }
+  def unjar(jarUrl: URL, dest: DirectoryPath, regexFilter: String, once: Boolean = false): Operation[Unit] =
+    val toSkip = once && UnjarLRUCache.alreadyUnjared((jarUrl, dest, regexFilter))
+    if toSkip then Operation.ok(())
+    else
+      val regex = compile(regexFilter)
+      val uis = jarUrl.openStream()
+      val zis = new ZipInputStream(new BufferedInputStream(uis))
+
+      @annotation.tailrec
+      def extractEntry(entry: ZipEntry): Unit =
+        if entry != null then
+          val matcher = regex.matcher(entry.getName)
+          if matcher.matches then
+            val target = matcher.replaceFirst(s"${quoteReplacement(dest.path)}$$1")
+            if !entry.isDirectory then
+              new File(target).getParentFile.mkdirs
+              new File(target).createNewFile
+              val fos = new FileOutputStream(target)
+              val dest = new BufferedOutputStream(fos, 2048)
+              try
+                copy(zis, dest)
+                dest.flush
+              finally dest.close
+
+          extractEntry(zis.getNextEntry)
+
+      Operation.delayed {
+        try extractEntry(zis.getNextEntry)
+        finally zis.close
+      }
 
   /** Copy an input stream to an output stream.
     * @param input

diff --git a/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala b/html/src/main/scala/org/specs2/reporter/HtmlPrinter.scala
@@ -187,7 +187,7 @@ case class HtmlPrinter(env: Env, searchPage: SearchPage, logger: Logger = Consol
       case Some(url) =>
         val fs = env.fileSystem
         if url.getProtocol.equalsIgnoreCase("jar") then
-          fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$")
+          fs.unjar(jarOf(url), outputDir, s"^${quote(base.path)}(/${quote(src.path)}/.*)$$", once = true)
         else fs.copyDir(DirectoryPath.unsafe(url.toURI), outputDir / src)
       case _ =>
         val message = s"no resource found for path ${(base / src).path}"