filecoin-station · pyropy · Dec 23, 2024 · Dec 23, 2024 · Dec 23, 2024 · Dec 24, 2024
diff --git a/lib/committee.js b/lib/committee.js
@@ -21,10 +21,10 @@ export class Committee {
   #measurements
 
   /**
-   * @param {Pick<Measurement, 'cid' | 'minerId'>} retrievalTask
+   * @param {Pick<Measurement, 'cid' | 'minerId' | 'roundId'>} retrievalTask
    */
-  constructor ({ cid, minerId }) {
-    this.retrievalTask = { minerId, cid }
+  constructor ({ cid, minerId, roundId }) {
+    this.retrievalTask = { minerId, cid, roundId }
 
     this.#measurements = []
 
@@ -48,6 +48,7 @@ export class Committee {
   addMeasurement (m) {
     assert.strictEqual(m.cid, this.retrievalTask.cid, 'cid must match')
     assert.strictEqual(m.minerId, this.retrievalTask.minerId, 'minerId must match')
+    assert.strictEqual(m.roundId, this.retrievalTask.roundId, 'roundId must match')
     assert.strictEqual(m.fraudAssessment, 'OK', 'only accepted measurements can be added')
     this.#measurements.push(m)
   }

diff --git a/lib/evaluate.js b/lib/evaluate.js
@@ -286,6 +286,8 @@ export const runFraudDetection = async ({
     // sanity checks to get nicer errors if we forget to set required fields in unit tests
     assert(typeof m.inet_group === 'string', 'missing inet_group')
     assert(typeof m.finished_at === 'number', 'missing finished_at')
+    assert(typeof m.start_at === 'number', 'missing start_at')
+    assert(typeof m.first_byte_at === 'number', 'missing first_byte_at')
 
     const isValidTask = sparkRoundDetails.retrievalTasks.some(
       t => t.cid === m.cid && t.minerId === m.minerId

diff --git a/lib/preprocess.js b/lib/preprocess.js
@@ -11,10 +11,11 @@ const debug = createDebug('spark:preprocess')
 
 export class Measurement {
   /**
+   * @param {Partial<import('./round.js').RoundData>} r
    * @param {Partial<import('./typings.js').RawMeasurement>} m
    * @param {<T extends string>(str: T) => T} pointerize
    */
-  constructor (m, pointerize = (v) => v) {
+  constructor (r, m, pointerize = (v) => v) {
     this.participantAddress = pointerize(parseParticipantAddress(m.participant_address))
     this.retrievalResult = pointerize(getRetrievalResult(m))
     this.cid = pointerize(m.cid)
@@ -38,6 +39,7 @@ export class Measurement {
     this.stationId = pointerize(m.station_id)
     this.carChecksum = pointerize(m.car_checksum)
     this.carTooLarge = m.car_too_large
+    this.roundId = pointerize(r.index.toString())
   }
 }
 
@@ -76,7 +78,7 @@ export const preprocess = async ({
     // eslint-disable-next-line camelcase
     .map(measurement => {
       try {
-        return new Measurement(measurement, round.pointerize)
+        return new Measurement(round, measurement, round.pointerize)
       } catch (err) {
         logger.error('Invalid measurement:', err.message, measurement)
         return null

diff --git a/lib/public-stats.js b/lib/public-stats.js
@@ -2,7 +2,7 @@
 import createDebug from 'debug'
 import * as providerRetrievalResultStats from './provider-retrieval-result-stats.js'
 import { updatePlatformStats } from './platform-stats.js'
-import { getTaskId } from './retrieval-stats.js'
+import { getTaskId, getValueAtPercentile } from './retrieval-stats.js'
 
 /** @import pg from 'pg' */
 /** @import { Committee } from './committee.js' */
@@ -27,6 +27,7 @@
     await updateIndexerQueryStats(pgClient, committees)
     await updateDailyDealsStats(pgClient, committees, findDealClients)
     await updatePlatformStats(pgClient, allMeasurements)
+    await updateRetreivalTimes(pgClient, committees)
   } finally {
     await pgClient.end()
   }
@@ -225,3 +226,37 @@
     flatStats.map(stat => stat.retrievable)
   ])
 }
+
+/**
+ * @param {pg.Client} pgClient
+ * @param {Iterable<Committee>} committees
+ */
+const updateRetreivalTimings = async (pgClient, committees) => {
-const updateRetreivalTimings = async (pgClient, committees) => {
+const updateRetrievalTimings = async (pgClient, committees) => {
-const updateRetreivalTimings = async (pgClient, committees) => {
+const updateRetrievalTimings = async (pgClient, committees) => {
+  /** @type {Array<{minerId: string; taskId: string; timeToFirstByteP50: number}>} */
+  const stats = []
+  for (const c of committees) {
+    if (!c.evaluation || !c.evaluation.hasRetrievalMajority || c.evaluation.retrievalResult !== 'OK') continue
+    const { minerId } = c.retrievalTask
+    const taskId = getTaskId(c.retrievalTask)
+    const ttfbMeasurments = []
+    for (const m of c.measurements) {
+      if (m.fraudAssessment !== 'OK') continue
+      ttfbMeasurments.push(m.first_byte_at - m.start_at)
+    }
+
+    const timeToFirstByteP50 = getValueAtPercentile(ttfbMeasurments, 0.5)
+    stats.push({ minerId, taskId, timeToFirstByteP50 })
+  }
+
+  // conflic should never happen, but in case it does we'll ignore the new value
+  await pgClient.query(`
+    INSERT INTO retrieval_timings
+    (day, miner_id, task_id, time_to_first_byte_p50) VALUES 
+    (now(), unnest($1::text[]), unnest($2::text[]), unnest($3::int[]))
+    ON CONFLICT(day, miner_id, task_id) DO NOTHING
+  `, [
+    stats.map(stat => stat.minerId),
+    stats.map(stat => stat.taskId),
+    stats.map(stat => stat.timeToFirstByteP50)
+  ])
+}
diff --git a/lib/retrieval-stats.js b/lib/retrieval-stats.js
@@ -164,10 +164,10 @@ const addHistogramToPoint = (point, values, fieldNamePrefix = '') => {
 }
 
 /**
- * @param {Pick<Measurement, 'cid' | 'minerId'>} m
+ * @param {Pick<Measurement, 'cid' | 'minerId' | 'roundId'>} m
  * @returns {string}
  */
-export const getTaskId = (m) => `${m.cid}::${m.minerId}`
+export const getTaskId = (m) => `${m.cid}::${m.minerId}::${m.roundId}`
 
 /**
  * @param {Measurement[]} measurements

diff --git a/migrations/021.do.add-retrieval-times.sql b/migrations/021.do.add-retrieval-times.sql
@@ -0,0 +1,7 @@
+CREATE TABLE retrieval_times (
+  day DATE NOT NULL,
+  miner_id TEXT NOT NULL,
+  task_id TEXT NOT NULL,
+  time_to_first_byte_p50 INT NOT NULL,
-  time_to_first_byte_p50 INT NOT NULL,
+  ttfb_p50 INT NOT NULL,
-  time_to_first_byte_p50 INT NOT NULL,
+  ttfb_p50 INT NOT NULL,
+  PRIMARY KEY (day, miner_id, task_id)
+);
diff --git a/test/helpers/test-data.js b/test/helpers/test-data.js
@@ -11,7 +11,8 @@ export const ROUND_DETAILS = 'bafybeie5rekb2jox77ow64wjjd2bjdsp6d3yeivhzzd234hnb
 export const VALID_TASK = {
   cid: 'QmUuEoBdjC8D1PfWZCc7JCSK8nj7TV6HbXWDHYHzZHCVGS',
   minerId: 'f1test',
-  clients: ['f1client']
+  clients: ['f1client'],
+  roundId: '0'
 }
 Object.freeze(VALID_TASK)
 
@@ -40,7 +41,8 @@ export const VALID_MEASUREMENT = {
   carTooLarge: false,
   retrievalResult: 'OK',
   indexerResult: 'OK',
-  fraudAssessment: null
+  fraudAssessment: null,
+  roundId: '0'
 }
 
 // Fraud detection is mutating the measurements parsed from JSON

diff --git a/test/preprocess.js b/test/preprocess.js
@@ -41,16 +41,18 @@ describe('preprocess', () => {
     await preprocess({ round, cid, roundIndex, fetchMeasurements, recordTelemetry, logger })
 
     assert.deepStrictEqual(round.measurements, [
-      new Measurement({
-        participant_address: '0x999999cf1046e68e36E1aA2E0E07105eDDD1f08E',
-        station_id: VALID_STATION_ID,
-        spark_version: '1.2.3',
-        inet_group: 'ig1',
-        finished_at: '2023-11-01T09:00:00.000Z',
-        first_byte_at: '2023-11-01T09:00:01.000Z',
-        start_at: '2023-11-01T09:00:02.000Z',
-        end_at: '2023-11-01T09:00:03.000Z'
-      })
+      new Measurement(
+        round,
+        {
+          participant_address: '0x999999cf1046e68e36E1aA2E0E07105eDDD1f08E',
+          station_id: VALID_STATION_ID,
+          spark_version: '1.2.3',
+          inet_group: 'ig1',
+          finished_at: '2023-11-01T09:00:00.000Z',
+          first_byte_at: '2023-11-01T09:00:01.000Z',
+          start_at: '2023-11-01T09:00:02.000Z',
+          end_at: '2023-11-01T09:00:03.000Z'
+        })
     ])
     assert.deepStrictEqual(getCalls, [cid])
     assert.deepStrictEqual(round.measurementBatches, [cid])

diff --git a/test/public-stats.test.js b/test/public-stats.test.js
@@ -28,6 +28,7 @@ describe('public-stats', () => {
     await pgClient.query('DELETE FROM retrieval_stats')
     await pgClient.query('DELETE FROM indexer_query_stats')
     await pgClient.query('DELETE FROM daily_deals')
+    await pgClient.query('DELETE FROM retrieval_times')
 
     // Run all tests inside a transaction to ensure `now()` always returns the same value
     // See https://dba.stackexchange.com/a/63549/125312
@@ -534,8 +535,88 @@ describe('public-stats', () => {
     })
   })
 
+  describe('retrieval_times', () => {
+    it.only('creates or updates the row for today', async () => {
+      /** @type {Measurement[]} */
+      const honestMeasurements = [
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 1000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 2000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 3000)
+      ]
+
+      /** @type {Measurement[]} */
+      const dishonestMeasurements = [
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 100),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 200),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 300)
+      ]
+
+      let allMeasurements = [...honestMeasurements, ...dishonestMeasurements]
+      let committees = buildEvaluatedCommitteesFromMeasurements(honestMeasurements)
+
+      await updatePublicStats({
+        createPgClient,
+        committees,
+        honestMeasurements,
+        allMeasurements,
+        findDealClients: (_minerId, _cid) => ['f0client']
+      })
+      const { rows: created } = await pgClient.query(
+        'SELECT day::TEXT, miner_id, task_id, time_to_first_byte_p50 FROM retrieval_times'
+      )
+      assert.deepStrictEqual(created, [
+        { day: today, miner_id: 'f1first', task_id: 'cidone::f1first::0', time_to_first_byte_p50: 2000 }
+      ])
+
+      /** @type {Measurement[]} */
+      const newHonestMeasurements = [
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 1000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 1000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'OK' }, 1000)
+      ]
+
+      /** @type {Measurement[]} */
+      const newDishonestMeasurements = [
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 10_000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 20_000),
+        givenTimeToFirstByte({ ...VALID_MEASUREMENT, cid: 'cidone', minerId: 'f1first', retrievalResult: 'UNKNOWN_ERROR' }, 30_000)
+      ]
+
+      allMeasurements = [...newHonestMeasurements, ...newDishonestMeasurements]
+      committees = buildEvaluatedCommitteesFromMeasurements(honestMeasurements)
+
+      await updatePublicStats({
+        createPgClient,
+        committees,
+        honestMeasurements,
+        allMeasurements,
+        findDealClients: (_minerId, _cid) => ['f0client']
+      })
+
+      const { rows: updated } = await pgClient.query(
+        'SELECT day::TEXT, miner_id, task_id, time_to_first_byte_p50 FROM retrieval_times'
+      )
+
+      // on conflict, we ignore new values
+      assert.deepStrictEqual(updated, [
+        { day: today, miner_id: 'f1first', task_id: 'cidone::f1first::0', time_to_first_byte_p50: 2000 }
+      ])
+    })
+  })
+
   const getCurrentDate = async () => {
     const { rows: [{ today }] } = await pgClient.query('SELECT now()::DATE::TEXT as today')
     return today
   }
+
+  /**
+   *
+   * @param {Measurement} measurment
+   * @param {number} timeToFirstByte  Time in milliseconds
+   * @returns
+   */
+  function givenTimeToFirstByte (measurment, timeToFirstByte) {
-  function givenTimeToFirstByte (measurment, timeToFirstByte) {
+  function givenTimeToFirstByte (measurement, timeToFirstByte) {
-  function givenTimeToFirstByte (measurment, timeToFirstByte) {
+  function givenTimeToFirstByte (measurement, timeToFirstByte) {
+    measurment.first_byte_at = measurment.start_at + timeToFirstByte
+    return measurment
+  }
 })