Skip to content

Commit

Permalink
feature: extract a more general LruCache
Browse files Browse the repository at this point in the history
  • Loading branch information
etorreborre committed Jun 12, 2024
1 parent fb8ad68 commit f92040e
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 17 deletions.
40 changes: 40 additions & 0 deletions common/shared/src/main/scala/org/specs2/data/LruCache.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.specs2.data

import org.specs2.fp.*
import org.specs2.control.*
import org.specs2.time.*

/** LRU (least recently used) cache for processing items Values can be registered and the cached cleaned so that it
* doesn't go above a given size. The oldest elements are removed first.
*/
class LruCache[A](maxSize: Int, systemTime: SystemTime = JavaSystemTime):
private var values: Map[A, Long] = Map.empty

/** Checks if a value has already been processed; if not immediately adds it to the cache. If it has been processed,
* refresh its timestamp.
* @return
* the processed status
*/
def register(value: A): Operation[ProcessedStatus] =
Operation.delayed {
this.synchronized:
val alreadyProcessed = values.contains(value)
// refresh the timestamp even if the params were already registered
values += value -> systemTime.nanoTime
val status = if alreadyProcessed then ProcessedStatus.AlreadyProcessed else ProcessedStatus.ToProcess
while values.size > maxSize do values -= values.minBy(_._2)._1
status
}

/** Return the number of elements in the cache */
def size: Int =
values.size

/** Return the timestamp for the oldest element */
def oldestTimestamp: Long =
values.minBy(_._2)._2

/** This enum describes the status of an item in the LruCache */
enum ProcessedStatus:
case AlreadyProcessed
case ToProcess
24 changes: 7 additions & 17 deletions common/shared/src/main/scala/org/specs2/io/FileSystem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package io

import control.*
import fp.syntax.*
import data.LruCache
import data.ProcessedStatus.*
import java.io.*
import java.util.regex.Pattern.*
import java.util.regex.Matcher.*
Expand Down Expand Up @@ -59,20 +61,7 @@ case class FileSystem(logger: Logger) extends FilePathReader:
mkdirs(path.dir)

/** Unjaring the same thing over and over is inefficient. LRU cache to keep track of what was already done. */
private object UnjarLRUCache:
private var unjarLRUCache: Map[(URL, DirectoryPath, String), Long] = Map.empty
private val maxSize = 1000

/** Checks if the given parameters were already processed; if not immediately adds them to the cache. */
def alreadyUnjared(params: (URL, DirectoryPath, String)): Boolean =
UnjarLRUCache.synchronized:
val alreadyUnjared = unjarLRUCache.contains(params)
unjarLRUCache += params -> System.nanoTime
if !alreadyUnjared then clean()
alreadyUnjared

/** Clean up LRU entries until cache is at most max size. */
private def clean(): Unit = while unjarLRUCache.size > maxSize do unjarLRUCache -= unjarLRUCache.minBy(_._2)._1
private val UnjarLRUCache = new LruCache[(URL, DirectoryPath, String)](maxSize = 1000)

/** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
* extracted with a regular expression. This is only done once per argument list (unless eventually evicted from LRU
Expand All @@ -84,13 +73,14 @@ case class FileSystem(logger: Logger) extends FilePathReader:
* @param regexFilter
* regular expression filtering files which shouldn't be extracted; the expression must capture the path of an
* entry as group 1 which will then be used relative to dirPath as target path for that entry
*
* @see
* [[unjar]]
*/
def unjarOnce(jarUrl: URL, dest: DirectoryPath, regexFilter: String): Operation[Unit] =
if UnjarLRUCache.alreadyUnjared((jarUrl, dest, regexFilter)) then Operation.ok(())
else unjar(jarUrl, dest, regexFilter)
for
status <- UnjarLRUCache.register((jarUrl, dest, regexFilter))
_ <- unjar(jarUrl, dest, regexFilter).when(status == ToProcess)
yield ()

/** Unjar the jar (or zip file) specified by "path" to the "dest" directory. Filters files which shouldn't be
* extracted with a regular expression.
Expand Down
9 changes: 9 additions & 0 deletions common/shared/src/main/scala/org/specs2/time/SystemTime.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.specs2.time

/** This trait provides the current time */
trait SystemTime:
def nanoTime: Long

object JavaSystemTime extends SystemTime:
override def nanoTime: Long =
System.nanoTime()
57 changes: 57 additions & 0 deletions tests/shared/src/test/scala/org/specs2/data/LruCacheSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.specs2
package data

import org.scalacheck.*
import org.scalacheck.Arbitrary.*
import org.specs2.time.*
import org.specs2.fp.syntax.*
import ProcessedStatus.*

class LruCacheSpec extends Specification with ScalaCheck:
def is = s2"""

A LRU cache can be used to store elements and evict them when they have been unused for a long time
A status is returned to know if an element has already been seen before $e1
The cache can not contain more than a fixed number of elements $e2
The oldest elements are always evicted first $e3

"""

def e1 =
val cache = LruCache[Int](maxSize = 3, systemTime = MockSystemTime())
val operations = cache.register(1) >> cache.register(2) >> cache.register(1)
val status = operations.unsafeRun
status === AlreadyProcessed

def e2 = prop { (n: SmallInt) =>
val cache = LruCache[Int](maxSize = 3, systemTime = MockSystemTime())
val operations = (1 to n.value).toList.traverse(i => cache.register(i))
operations.void.unsafeRun
cache.size must be_<=(3)
}.set(minTestsOk = 10)

def e3 = prop { (n: SmallInt) =>
val mockSystemTime = MockSystemTime()
val cache = LruCache[Int](maxSize = 3, systemTime = mockSystemTime)
println(n.value)
val operations = (1 to n.value).toList.traverse(i => cache.register(i))
operations.void.unsafeRun
cache.oldestTimestamp must be_<(mockSystemTime.nanoTime)
}.set(minTestsOk = 10)

/** HELPERS */
class MockSystemTime() extends SystemTime:
private var times: LazyList[Long] = LazyList.from(1).map(_.toLong)

def nanoTime: Long =
times match {
case t #:: ts => times = ts; t
}

case class SmallInt(value: Int)

object SmallInt {
given Arbitrary[SmallInt] = Arbitrary {
arbitrary[Int].map(n => SmallInt((n % 10).abs + 1))
}
}

0 comments on commit f92040e

Please sign in to comment.