From 911e8b03d6c52126279c9610c4ede72059cc8367 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 5 Jun 2023 21:20:56 -0700 Subject: [PATCH 01/18] wip --- .../texera/web/service/JobResultService.scala | 5 +-- .../common/workflow/LogicalPlan.scala | 42 ++++++++++++++++-- .../workflow/SinkInjectionTransformer.scala | 44 +++++++++++++++++++ .../common/workflow/WorkflowCompiler.scala | 19 ++------ .../common/workflow/WorkflowRewriter.scala | 4 +- .../sink/managed/ProgressiveSinkOpExec.scala | 4 +- 6 files changed, 90 insertions(+), 28 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index 957b79282bb..6545ca55f2d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -184,11 +184,8 @@ class JobResultService( newState.operatorInfo.foreach { case (opId, info) => val oldInfo = oldState.operatorInfo.getOrElse(opId, new OperatorResultMetadata()) - // TODO: frontend now receives snapshots instead of deltas, we can optimize this - // if (oldInfo.tupleCount != info.tupleCount) { buf(opId) = progressiveResults(opId).convertWebResultUpdate(oldInfo.tupleCount, info.tupleCount) - //} } Iterable(WebResultUpdateEvent(buf.toMap)) }) @@ -217,7 +214,7 @@ class JobResultService( }) // For cached operators and sinks, create result service so that the results can be displayed. - logicalPlan.getSinkOperators.map(sink => { + logicalPlan.getTerminalOperators.map(sink => { logicalPlan.getOperator(sink) match { case sinkOp: ProgressiveSinkOpDesc => val service = new ProgressiveResultService(sinkOp) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index df192af57bf..9cc15835c15 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -60,10 +60,9 @@ case class LogicalPlan( lazy val sourceOperators: List[String] = operatorMap.keys.filter(op => jgraphtDag.inDegreeOf(op) == 0).toList - lazy val sinkOperators: List[String] = + lazy val terminalOperators: List[String] = operatorMap.keys - .filter(op => operatorMap(op).isInstanceOf[SinkOpDesc]) - .toList + .filter(op => jgraphtDag.outDegreeOf(op) == 0).toList lazy val (inputSchemaMap, errorList) = propagateWorkflowSchema() @@ -83,7 +82,7 @@ case class LogicalPlan( def getSourceOperators: List[String] = this.sourceOperators - def getSinkOperators: List[String] = this.sinkOperators + def getTerminalOperators: List[String] = this.terminalOperators def getUpstream(operatorID: String): List[OperatorDescriptor] = { val upstream = new mutable.MutableList[OperatorDescriptor] @@ -93,6 +92,41 @@ case class LogicalPlan( upstream.toList } + def addOperator(operatorDescriptor: OperatorDescriptor): LogicalPlan = { + this.copy(operators :+ operatorDescriptor, links, breakpoints, cachedOperatorIds) + } + + def removeOperator(operatorId: String): LogicalPlan = { + this.copy( + operators.filter(o => o.operatorID == operatorId), + links, + breakpoints, cachedOperatorIds) + } + + // returns a new physical plan with the edges added + def addEdge( + from: String, + to: String, + fromPort: Int = 0, + toPort: Int = 0 + ): LogicalPlan = { + val newLink = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) + val newLinks = links :+ newLink + this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + } + + // returns a new physical plan with the edges removed + def removeEdge( + from: String, + to: String, + fromPort: Int = 0, + toPort: Int = 0 + ): LogicalPlan = { + val linkToRemove = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) + val newLinks = links.filter(l => l != linkToRemove) + this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + } + def getDownstream(operatorID: String): List[OperatorDescriptor] = { val downstream = new mutable.MutableList[OperatorDescriptor] jgraphtDag diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala new file mode 100644 index 00000000000..6da6b888caa --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala @@ -0,0 +1,44 @@ +package edu.uci.ics.texera.workflow.common.workflow + +import edu.uci.ics.texera.workflow.operators.sink.SinkOpDesc +import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc +import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator + +object SinkInjectionTransformer { + + def transform(logicalPlan: LogicalPlan): LogicalPlan = { + var resultPlan = logicalPlan + + logicalPlan.getTerminalOperators.foreach(terminalOpId => { + val terminalOp = logicalPlan.getOperator(terminalOpId) + if (! terminalOp.isInstanceOf[SinkOpDesc]) { + terminalOp.operatorInfo.outputPorts.indices.foreach(out => { + val sink = new ProgressiveSinkOpDesc() + resultPlan = resultPlan + .addOperator(sink) + .addEdge(terminalOp.operatorID, sink.operatorID, out, 0) + }) + } + }) + + // pre-process: set output mode for sink based on the visualization operator before it + logicalPlan.getTerminalOperators.foreach(sinkOpId => { + val sinkOp = logicalPlan.getOperator(sinkOpId) + val upstream = logicalPlan.getUpstream(sinkOpId) + if (upstream.nonEmpty) { + (upstream.head, sinkOp) match { + // match the combination of a visualization operator followed by a sink operator + case (viz: VisualizationOperator, sink: ProgressiveSinkOpDesc) => + sink.setOutputMode(viz.outputMode()) + sink.setChartType(viz.chartType()) + case _ => + //skip + } + } + }) + + } + + + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index f445e70713b..fca4ddfc93e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -35,23 +35,10 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex .filter(o => o._2.nonEmpty) def amberWorkflow(workflowId: WorkflowIdentity, opResultStorage: OpResultStorage): Workflow = { - // pre-process: set output mode for sink based on the visualization operator before it - logicalPlan.getSinkOperators.foreach(sinkOpId => { - val sinkOp = logicalPlan.getOperator(sinkOpId) - val upstream = logicalPlan.getUpstream(sinkOpId) - if (upstream.nonEmpty) { - (upstream.head, sinkOp) match { - // match the combination of a visualization operator followed by a sink operator - case (viz: VisualizationOperator, sink: ProgressiveSinkOpDesc) => - sink.setOutputMode(viz.outputMode()) - sink.setChartType(viz.chartType()) - case _ => - //skip - } - } - }) + // logical plan transformation: add a sink operator for terminal operators without a sink + val logicalPlan0 = SinkInjectionTransformer.transform(logicalPlan) - val physicalPlan0 = logicalPlan.toPhysicalPlan(this.context, opResultStorage) + val physicalPlan0 = logicalPlan0.toPhysicalPlan(this.context, opResultStorage) // create pipelined regions. val physicalPlan1 = new WorkflowPipelinedRegionsBuilder( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala index bbe44a0a7c2..4855e36577c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala @@ -90,7 +90,7 @@ class WorkflowRewriter( logger.info("Rewriting workflow {}", logicalPlan) checkCacheValidity() - workflowDAG.getSinkOperators.foreach(addCacheSourceOpIdQue.+=) + workflowDAG.getTerminalOperators.foreach(addCacheSourceOpIdQue.+=) // Topological traverse and add cache source operators. while (addCacheSourceOpIdQue.nonEmpty) { @@ -115,7 +115,7 @@ class WorkflowRewriter( addCacheSourceNewLinks.toList, addCacheSourceNewBreakpoints.toList ) - addCacheSourceLogicalPlan.getSinkOperators.foreach(addCacheSourceOpIdQue.+=) + addCacheSourceLogicalPlan.getTerminalOperators.foreach(addCacheSourceOpIdQue.+=) // Topological traverse and add cache sink operators. val addCacheSinkOpIdIter = addCacheSourceLogicalPlan.jgraphtDag.iterator() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpExec.scala index f973016beb3..c7551fe936d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpExec.scala @@ -1,13 +1,13 @@ package edu.uci.ics.texera.workflow.operators.sink.managed import edu.uci.ics.amber.engine.common.tuple.ITuple -import edu.uci.ics.amber.engine.common.virtualidentity.LinkIdentity +import edu.uci.ics.amber.engine.common.virtualidentity.{LayerIdentity, LinkIdentity} import edu.uci.ics.amber.engine.common.{ISinkOperatorExecutor, InputExhausted} import edu.uci.ics.texera.workflow.common.IncrementalOutputMode._ import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.OperatorSchemaInfo import edu.uci.ics.texera.workflow.common.{IncrementalOutputMode, ProgressiveUtils} -import edu.uci.ics.texera.workflow.operators.sink.storage.{SinkStorageWriter, SinkStorageReader} +import edu.uci.ics.texera.workflow.operators.sink.storage.{SinkStorageReader, SinkStorageWriter} import scala.collection.mutable From d2eecc575770c36c842f7f492345ed574ceb592d Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Tue, 27 Jun 2023 16:56:58 -0700 Subject: [PATCH 02/18] support workflows without a view result operator --- .../texera/web/service/JobResultService.scala | 9 ++- .../web/service/WorkflowJobService.scala | 58 +++++++++---------- .../common/workflow/LogicalPlan.scala | 25 ++++---- .../workflow/SinkInjectionTransformer.scala | 33 +++++++---- .../common/workflow/WorkflowCompiler.scala | 13 +++-- .../common/workflow/WorkflowRewriter.scala | 2 +- .../sink/managed/ProgressiveSinkOpDesc.java | 30 ++++++---- .../validation/validation-workflow.service.ts | 28 ++------- 8 files changed, 104 insertions(+), 94 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index 6545ca55f2d..a2f7ade73ed 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -213,15 +213,14 @@ class JobResultService( } }) - // For cached operators and sinks, create result service so that the results can be displayed. + // For operators connected to a sink and sinks, + // create result service so that the results can be displayed. logicalPlan.getTerminalOperators.map(sink => { logicalPlan.getOperator(sink) match { case sinkOp: ProgressiveSinkOpDesc => val service = new ProgressiveResultService(sinkOp) - sinkOp.getCachedUpstreamId match { - case Some(upstreamId) => progressiveResults += ((upstreamId, service)) - case None => progressiveResults += ((sink, service)) - } + progressiveResults += ((sinkOp.getUpstreamId.get, service)) + progressiveResults += ((sink, service)) case other => // skip other non-texera-managed sinks, if any } }) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index 507a43e8843..7af128e782a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -31,8 +31,8 @@ class WorkflowJobService( with LazyLogging { val stateStore = new JobStateStore() - val logicalPlan: LogicalPlan = createLogicalPlan() - val workflowCompiler: WorkflowCompiler = createWorkflowCompiler(logicalPlan) +// val logicalPlan: LogicalPlan = createLogicalPlan() + val workflowCompiler: WorkflowCompiler = createWorkflowCompiler(LogicalPlan(request.logicalPlan)) val workflow: Workflow = workflowCompiler.amberWorkflow( WorkflowIdentity(workflowContext.jobId), resultService.opResultStorage @@ -40,7 +40,7 @@ class WorkflowJobService( private val controllerConfig = { val conf = ControllerConfig.default if ( - logicalPlan.operators.exists { + workflowCompiler.finalLogicalPlan.operators.exists { case x: DualInputPortsPythonUDFOpDescV2 => true case x: PythonUDFOpDescV2 => true case x: PythonUDFSourceOpDescV2 => true @@ -77,13 +77,13 @@ class WorkflowJobService( workflowContext.executionID = -1 // for every new execution, // reset it so that the value doesn't carry over across executions def startWorkflow(): Unit = { - for (pair <- logicalPlan.breakpoints) { + for (pair <- workflowCompiler.finalLogicalPlan.breakpoints) { Await.result( jobBreakpointService.addBreakpoint(pair.operatorID, pair.breakpoint), Duration.fromSeconds(10) ) } - resultService.attachToJob(stateStore, logicalPlan, client) + resultService.attachToJob(stateStore, workflowCompiler.finalLogicalPlan, client) if (WorkflowService.userSystemEnabled) { workflowContext.executionID = ExecutionsMetadataPersistService.insertNewExecution( workflowContext.wId, @@ -101,30 +101,30 @@ class WorkflowJobService( ) } - private[this] def createLogicalPlan(): LogicalPlan = { - var logicalPlan = LogicalPlan(request.logicalPlan) - if (WorkflowCacheService.isAvailable) { - logger.debug( - s"Cached operators: ${operatorCache.cachedOperators} with ${logicalPlan.cachedOperatorIds}" - ) - val workflowRewriter = new WorkflowRewriter( - logicalPlan, - operatorCache.cachedOperators, - operatorCache.cacheSourceOperators, - operatorCache.cacheSinkOperators, - operatorCache.operatorRecord, - resultService.opResultStorage - ) - val newWorkflowInfo = workflowRewriter.rewrite - val oldWorkflowInfo = logicalPlan - logicalPlan = newWorkflowInfo - logicalPlan.cachedOperatorIds = oldWorkflowInfo.cachedOperatorIds - logger.info( - s"Rewrite the original workflow: $oldWorkflowInfo to be: $logicalPlan" - ) - } - logicalPlan - } +// private[this] def createLogicalPlan(): LogicalPlan = { +// var logicalPlan = LogicalPlan(request.logicalPlan) +// if (WorkflowCacheService.isAvailable) { +//// logger.debug( +//// s"Cached operators: ${operatorCache.cachedOperators} with ${logicalPlan.cachedOperatorIds}" +//// ) +//// val workflowRewriter = new WorkflowRewriter( +//// logicalPlan, +//// operatorCache.cachedOperators, +//// operatorCache.cacheSourceOperators, +//// operatorCache.cacheSinkOperators, +//// operatorCache.operatorRecord, +//// resultService.opResultStorage +//// ) +//// val newWorkflowInfo = workflowRewriter.rewrite +//// val oldWorkflowInfo = logicalPlan +//// logicalPlan = newWorkflowInfo +//// logicalPlan.cachedOperatorIds = oldWorkflowInfo.cachedOperatorIds +//// logger.info( +//// s"Rewrite the original workflow: $oldWorkflowInfo to be: $logicalPlan" +//// ) +// } +// logicalPlan +// } private[this] def createWorkflowCompiler( logicalPlan: LogicalPlan diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index 9cc15835c15..a53e988689b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -62,7 +62,8 @@ case class LogicalPlan( lazy val terminalOperators: List[String] = operatorMap.keys - .filter(op => jgraphtDag.outDegreeOf(op) == 0).toList + .filter(op => jgraphtDag.outDegreeOf(op) == 0) + .toList lazy val (inputSchemaMap, errorList) = propagateWorkflowSchema() @@ -100,15 +101,17 @@ case class LogicalPlan( this.copy( operators.filter(o => o.operatorID == operatorId), links, - breakpoints, cachedOperatorIds) + breakpoints, + cachedOperatorIds + ) } // returns a new physical plan with the edges added def addEdge( - from: String, - to: String, - fromPort: Int = 0, - toPort: Int = 0 + from: String, + to: String, + fromPort: Int = 0, + toPort: Int = 0 ): LogicalPlan = { val newLink = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) val newLinks = links :+ newLink @@ -117,10 +120,10 @@ case class LogicalPlan( // returns a new physical plan with the edges removed def removeEdge( - from: String, - to: String, - fromPort: Int = 0, - toPort: Int = 0 + from: String, + to: String, + fromPort: Int = 0, + toPort: Int = 0 ): LogicalPlan = { val linkToRemove = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) val newLinks = links.filter(l => l != linkToRemove) @@ -232,7 +235,7 @@ case class LogicalPlan( // assign storage to texera-managed sinks before generating exec config operators.foreach { case o @ (sink: ProgressiveSinkOpDesc) => - val storageKey = sink.getCachedUpstreamId.getOrElse(o.operatorID) + val storageKey = sink.getUpstreamId.getOrElse(o.operatorID) // due to the size limit of single document in mongoDB (16MB) // for sinks visualizing HTMLs which could possibly be large in size, we always use the memory storage. val storageType = diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala index 6da6b888caa..eb64788d214 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala @@ -1,19 +1,22 @@ package edu.uci.ics.texera.workflow.common.workflow +import edu.uci.ics.texera.workflow.common.WorkflowContext import edu.uci.ics.texera.workflow.operators.sink.SinkOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator object SinkInjectionTransformer { - def transform(logicalPlan: LogicalPlan): LogicalPlan = { + def transform(logicalPlan: LogicalPlan, context: WorkflowContext): LogicalPlan = { var resultPlan = logicalPlan + // for any terminal operator without a sink, add a sink logicalPlan.getTerminalOperators.foreach(terminalOpId => { val terminalOp = logicalPlan.getOperator(terminalOpId) - if (! terminalOp.isInstanceOf[SinkOpDesc]) { + if (!terminalOp.isInstanceOf[SinkOpDesc]) { terminalOp.operatorInfo.outputPorts.indices.foreach(out => { val sink = new ProgressiveSinkOpDesc() + sink.setContext(context) resultPlan = resultPlan .addOperator(sink) .addEdge(terminalOp.operatorID, sink.operatorID, out, 0) @@ -21,12 +24,23 @@ object SinkInjectionTransformer { } }) - // pre-process: set output mode for sink based on the visualization operator before it - logicalPlan.getTerminalOperators.foreach(sinkOpId => { - val sinkOp = logicalPlan.getOperator(sinkOpId) - val upstream = logicalPlan.getUpstream(sinkOpId) - if (upstream.nonEmpty) { - (upstream.head, sinkOp) match { + // for each sink: + // set the corresponding upstream ID and port + // set output mode based on the visualization operator before it + // precondition: all the terminal operators are sinks + resultPlan.getTerminalOperators.foreach(sinkOpId => { + val sinkOp = resultPlan.getOperator(sinkOpId).asInstanceOf[ProgressiveSinkOpDesc] + val upstream = resultPlan.getUpstream(sinkOpId).headOption + val edge = resultPlan.links.find(l => + l.origin.operatorID == upstream.map(_.operatorID).orNull + && l.destination.operatorID == sinkOpId + ) + if (upstream.nonEmpty && edge.nonEmpty) { + // set upstream ID and port + sinkOp.setUpstreamId(upstream.get.operatorID) + sinkOp.setUpstreamPort(edge.get.origin.portOrdinal) + // set output mode for visualization operator + (upstream.get, sinkOp) match { // match the combination of a visualization operator followed by a sink operator case (viz: VisualizationOperator, sink: ProgressiveSinkOpDesc) => sink.setOutputMode(viz.outputMode()) @@ -37,8 +51,7 @@ object SinkInjectionTransformer { } }) + resultPlan } - - } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index fca4ddfc93e..6dd5829987e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -6,8 +6,6 @@ import edu.uci.ics.amber.engine.common.virtualidentity.WorkflowIdentity import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.{ConstraintViolation, WorkflowContext} -import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator object WorkflowCompiler { @@ -25,6 +23,8 @@ object WorkflowCompiler { class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContext) { logicalPlan.operatorMap.values.foreach(initOperator) + lazy val finalLogicalPlan = transformLogicalPlan(logicalPlan) + def initOperator(operator: OperatorDescriptor): Unit = { operator.setContext(context) } @@ -34,11 +34,14 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex .map(o => (o._1, o._2.validate().toSet)) .filter(o => o._2.nonEmpty) - def amberWorkflow(workflowId: WorkflowIdentity, opResultStorage: OpResultStorage): Workflow = { + def transformLogicalPlan(originalPlan: LogicalPlan): LogicalPlan = { + // logical plan transformation: add a sink operator for terminal operators without a sink - val logicalPlan0 = SinkInjectionTransformer.transform(logicalPlan) + SinkInjectionTransformer.transform(originalPlan, context) + } - val physicalPlan0 = logicalPlan0.toPhysicalPlan(this.context, opResultStorage) + def amberWorkflow(workflowId: WorkflowIdentity, opResultStorage: OpResultStorage): Workflow = { + val physicalPlan0 = finalLogicalPlan.toPhysicalPlan(this.context, opResultStorage) // create pipelined regions. val physicalPlan1 = new WorkflowPipelinedRegionsBuilder( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala index 5327c3a94b7..0f305284d31 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala @@ -391,7 +391,7 @@ class WorkflowRewriter( cachedOperatorDescriptors.toString() ) val cacheSinkOperator = new ProgressiveSinkOpDesc() - cacheSinkOperator.setCachedUpstreamId(operatorDescriptor.operatorID) +// cacheSinkOperator.setCachedUpstreamId(operatorDescriptor.operatorID) cacheSinkOperatorDescriptors += ((operatorDescriptor.operatorID, cacheSinkOperator)) val cacheSourceOperator = new CacheSourceOpDesc(operatorDescriptor.operatorID, opResultStorage) cacheSourceOperatorDescriptors += ((operatorDescriptor.operatorID, cacheSourceOperator)) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java index 6ad5e6af81d..ddc3c585981 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java @@ -4,7 +4,6 @@ import com.google.common.base.Preconditions; import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig; import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecFunc; -import edu.uci.ics.amber.engine.common.IOperatorExecutor; import edu.uci.ics.texera.workflow.common.IncrementalOutputMode; import edu.uci.ics.texera.workflow.common.ProgressiveUtils; import edu.uci.ics.texera.workflow.common.metadata.InputPort; @@ -16,7 +15,6 @@ import edu.uci.ics.texera.workflow.operators.sink.storage.SinkStorageReader; import scala.Option; import scala.collection.immutable.List; -import scala.reflect.ClassTag; import java.io.Serializable; @@ -38,8 +36,12 @@ public class ProgressiveSinkOpDesc extends SinkOpDesc { @JsonIgnore private SinkStorageReader storage = null; + // corresponding upstream operator ID and output port, will be set by workflow compiler @JsonIgnore - private Option cachedUpstreamId = Option.empty(); + private Option upstreamId = Option.empty(); + + @JsonIgnore + private Option upstreamPort = Option.empty(); @Override public OpExecConfig operatorExecutor(OperatorSchemaInfo operatorSchemaInfo) { @@ -101,17 +103,25 @@ public void setChartType(String chartType) { } @JsonIgnore - public void setStorage(SinkStorageReader storage){ this.storage = storage; } + public void setStorage(SinkStorageReader storage) { this.storage = storage; } @JsonIgnore - public SinkStorageReader getStorage(){ return this.storage; } + public SinkStorageReader getStorage() { return this.storage; } - @JsonIgnore - public Option getCachedUpstreamId(){ return this.cachedUpstreamId;} + public Option getUpstreamId() { + return upstreamId; + } - @JsonIgnore - public void setCachedUpstreamId(String id){ - this.cachedUpstreamId = Option.apply(id); + public void setUpstreamId(String upstreamId) { + this.upstreamId = Option.apply(upstreamId); + } + + public Option getUpstreamPort() { + return upstreamPort; + } + + public void setUpstreamPort(Integer upstreamPort) { + this.upstreamPort = Option.apply(upstreamPort); } diff --git a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts index 48598b72482..c94ee86b562 100644 --- a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts +++ b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts @@ -68,6 +68,7 @@ export class ValidationWorkflowService { this.operatorSchemaList = metadata.operators; this.initializeValidation(); }); + this.getWorkflowValidationErrorStream().subscribe(c => console.log(c)); } public getCurrentWorkflowValidationError(): { @@ -240,8 +241,9 @@ export class ValidationWorkflowService { } /** - * This method is used to check whether all ports of the operator have been connected. - * if all ports of the operator are connected, the operator is valid. + * This method is used to check whether all input ports of the operator have been connected. + * if all input ports of the operator are connected, the operator is valid. + * All output ports without a connection will be implicitly treated as a final result operator. */ private validateOperatorConnection(operatorID: string): Validation { const operator = this.workflowActionService.getTexeraGraph().getOperator(operatorID); @@ -284,33 +286,13 @@ export class ValidationWorkflowService { } } - // check if output links satisfy the requirement - const requiredOutputNum = operator.outputPorts.length; - const actualOutputNum = texeraGraph - .getOutputLinksByOperatorId(operatorID) - .filter(link => texeraGraph.isLinkEnabled(link.linkID)).length; - - // If the operator is the sink operator, the actual output number must be equal to required number. - const satisyOutput = - this.operatorMetadataService.getOperatorSchema(operator.operatorType).additionalMetadata.operatorGroupName === - "View Results" - ? requiredOutputNum === actualOutputNum - : requiredOutputNum <= actualOutputNum; - - const outputPortsViolationMessage = satisyOutput - ? "" - : `requires ${requiredOutputNum} outputs, has ${actualOutputNum} outputs`; - - if (satisfyInput && satisyOutput) { + if (satisfyInput) { return { isValid: true }; } else { const messages: Record = {}; if (!satisfyInput) { messages[ValidationWorkflowService.VALIDATION_OPERATOR_INPUT_MESSAGE] = inputPortsViolationMessage; } - if (!satisyOutput) { - messages[ValidationWorkflowService.VALIDATION_OPERATOR_OUTPUT_MESSAGE] = outputPortsViolationMessage; - } return { isValid: false, messages: messages }; } } From 7b6097876b9e9d9ba5e3b33707ab692a19ee0ca3 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Tue, 27 Jun 2023 17:11:19 -0700 Subject: [PATCH 03/18] update --- .../web/service/WorkflowJobService.scala | 29 +------------------ .../metadata/OperatorGroupConstants.scala | 4 +-- .../sink/managed/ProgressiveSinkOpDesc.java | 2 +- 3 files changed, 3 insertions(+), 32 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index 7af128e782a..04b07a62623 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -12,7 +12,7 @@ import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.{READ import edu.uci.ics.texera.web.{SubscriptionManager, TexeraWebApplication, WebsocketInput} import edu.uci.ics.texera.workflow.common.WorkflowContext import edu.uci.ics.texera.workflow.common.workflow.WorkflowCompiler.ConstraintViolationException -import edu.uci.ics.texera.workflow.common.workflow.{LogicalPlan, WorkflowCompiler, WorkflowRewriter} +import edu.uci.ics.texera.workflow.common.workflow.{LogicalPlan, WorkflowCompiler} import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 import edu.uci.ics.texera.workflow.operators.udf.python.{ DualInputPortsPythonUDFOpDescV2, @@ -22,7 +22,6 @@ import edu.uci.ics.texera.workflow.operators.udf.python.{ class WorkflowJobService( workflowContext: WorkflowContext, wsInput: WebsocketInput, - operatorCache: WorkflowCacheService, resultService: JobResultService, request: WorkflowExecuteRequest, errorHandler: Throwable => Unit, @@ -31,7 +30,6 @@ class WorkflowJobService( with LazyLogging { val stateStore = new JobStateStore() -// val logicalPlan: LogicalPlan = createLogicalPlan() val workflowCompiler: WorkflowCompiler = createWorkflowCompiler(LogicalPlan(request.logicalPlan)) val workflow: Workflow = workflowCompiler.amberWorkflow( WorkflowIdentity(workflowContext.jobId), @@ -101,31 +99,6 @@ class WorkflowJobService( ) } -// private[this] def createLogicalPlan(): LogicalPlan = { -// var logicalPlan = LogicalPlan(request.logicalPlan) -// if (WorkflowCacheService.isAvailable) { -//// logger.debug( -//// s"Cached operators: ${operatorCache.cachedOperators} with ${logicalPlan.cachedOperatorIds}" -//// ) -//// val workflowRewriter = new WorkflowRewriter( -//// logicalPlan, -//// operatorCache.cachedOperators, -//// operatorCache.cacheSourceOperators, -//// operatorCache.cacheSinkOperators, -//// operatorCache.operatorRecord, -//// resultService.opResultStorage -//// ) -//// val newWorkflowInfo = workflowRewriter.rewrite -//// val oldWorkflowInfo = logicalPlan -//// logicalPlan = newWorkflowInfo -//// logicalPlan.cachedOperatorIds = oldWorkflowInfo.cachedOperatorIds -//// logger.info( -//// s"Rewrite the original workflow: $oldWorkflowInfo to be: $logicalPlan" -//// ) -// } -// logicalPlan -// } - private[this] def createWorkflowCompiler( logicalPlan: LogicalPlan ): WorkflowCompiler = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorGroupConstants.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorGroupConstants.scala index 2ad0dab04cd..5490ade9430 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorGroupConstants.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorGroupConstants.scala @@ -8,7 +8,6 @@ object OperatorGroupConstants { final val UTILITY_GROUP = "Utilities" final val UDF_GROUP = "User-defined Functions" final val VISUALIZATION_GROUP = "Visualization" - final val RESULT_GROUP = "View Results" /** * The order of the groups to show up in the frontend operator panel. @@ -21,8 +20,7 @@ object OperatorGroupConstants { GroupInfo(JOIN_GROUP, 3), GroupInfo(UTILITY_GROUP, 4), GroupInfo(UDF_GROUP, 5), - GroupInfo(VISUALIZATION_GROUP, 6), - GroupInfo(RESULT_GROUP, 7) + GroupInfo(VISUALIZATION_GROUP, 6) ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java index ddc3c585981..8f52cf6f70b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java @@ -56,7 +56,7 @@ public OperatorInfo operatorInfo() { return new OperatorInfo( "View Results", "View the edu.uci.ics.texera.workflow results", - OperatorGroupConstants.RESULT_GROUP(), + OperatorGroupConstants.UTILITY_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), List.empty(), false, false, false, false); } From e5e89e97d7250b6e747ba7d0434013dc321758e1 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Tue, 27 Jun 2023 17:12:29 -0700 Subject: [PATCH 04/18] update --- .../workspace/service/validation/validation-workflow.service.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts index c94ee86b562..280f6a91c4e 100644 --- a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts +++ b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts @@ -243,7 +243,6 @@ export class ValidationWorkflowService { /** * This method is used to check whether all input ports of the operator have been connected. * if all input ports of the operator are connected, the operator is valid. - * All output ports without a connection will be implicitly treated as a final result operator. */ private validateOperatorConnection(operatorID: string): Validation { const operator = this.workflowActionService.getTexeraGraph().getOperator(operatorID); From c70878714913b09b6a0624c26367b3d427adf08a Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Thu, 6 Jul 2023 10:00:56 -0700 Subject: [PATCH 05/18] wip --- .../uci/ics/texera/workflowcachestate.proto | 20 -- .../uci/ics/texera/workflowresultstate.proto | 22 -- .../event/WebResultUpdateEvent.scala | 2 +- .../event/WorkflowAvailableResultEvent.scala | 2 +- .../service/ProgressiveResultService.scala | 30 +-- .../web/service/WorkflowCacheChecker.scala | 72 +++++ .../web/service/WorkflowCacheService.scala | 106 ++++---- .../web/service/WorkflowJobService.scala | 2 +- ...vice.scala => WorkflowResultService.scala} | 27 +- .../texera/web/service/WorkflowService.scala | 19 +- .../web/storage/WorkflowResultStore.scala | 7 + .../web/storage/WorkflowStateStore.scala | 6 +- .../common/storage/OpResultStorage.scala | 5 + .../common/workflow/LogicalPlan.scala | 34 ++- .../workflow/SinkInjectionTransformer.scala | 40 ++- .../workflow/WorkflowCacheRewriter.scala | 50 ++++ .../source/cache/CacheSourceOpDesc.scala | 2 +- .../web/workflowcachestate/CacheState.scala | 44 ---- .../WorkflowCacheStore.scala | 249 ------------------ .../WorkflowcachestateProto.scala | 37 --- .../OperatorResultMetadata.scala | 135 ---------- .../WorkflowResultStore.scala | 248 ----------------- .../WorkflowresultstateProto.scala | 40 --- 23 files changed, 275 insertions(+), 924 deletions(-) delete mode 100644 core/amber/src/main/protobuf/edu/uci/ics/texera/workflowcachestate.proto delete mode 100644 core/amber/src/main/protobuf/edu/uci/ics/texera/workflowresultstate.proto create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala rename core/amber/src/main/scala/edu/uci/ics/texera/web/service/{JobResultService.scala => WorkflowResultService.scala} (92%) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/CacheState.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowCacheStore.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowcachestateProto.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/OperatorResultMetadata.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowResultStore.scala delete mode 100644 core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowresultstateProto.scala diff --git a/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowcachestate.proto b/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowcachestate.proto deleted file mode 100644 index d30c9536ccd..00000000000 --- a/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowcachestate.proto +++ /dev/null @@ -1,20 +0,0 @@ -syntax = "proto3"; - -package edu.uci.ics.texera.web; - -import "scalapb/scalapb.proto"; - -option (scalapb.options) = { - scope: FILE, - preserve_unknown_fields: false - no_default_values_in_constructor: false -}; - -enum CacheState{ - VALID = 0; - INVALID = 1; -} - -message WorkflowCacheStore{ - map operator_info = 1; -} \ No newline at end of file diff --git a/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowresultstate.proto b/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowresultstate.proto deleted file mode 100644 index d8800bed878..00000000000 --- a/core/amber/src/main/protobuf/edu/uci/ics/texera/workflowresultstate.proto +++ /dev/null @@ -1,22 +0,0 @@ -syntax = "proto3"; - -package edu.uci.ics.texera.web; - -import "scalapb/scalapb.proto"; - -option (scalapb.options) = { - scope: FILE, - preserve_unknown_fields: false - no_default_values_in_constructor: false -}; - - -message WorkflowResultStore{ - map operator_info = 1; -} - - -message OperatorResultMetadata{ - int32 tuple_count = 1; - string change_detector = 2; -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala index d30390978b0..68ee72b2a93 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala @@ -1,5 +1,5 @@ package edu.uci.ics.texera.web.model.websocket.event -import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate +import edu.uci.ics.texera.web.service.WorkflowResultService.WebResultUpdate case class WebResultUpdateEvent(updates: Map[String, WebResultUpdate]) extends TexeraWebSocketEvent diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala index d57bbe46403..91b7f11e0d0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.web.model.websocket.event import edu.uci.ics.texera.web.model.websocket.event.WorkflowAvailableResultEvent.OperatorAvailableResult -import edu.uci.ics.texera.web.service.JobResultService.WebOutputMode +import edu.uci.ics.texera.web.service.WorkflowResultService.WebOutputMode object WorkflowAvailableResultEvent { case class OperatorAvailableResult( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala index a95e3b1314b..7bb91576e1a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.web.service -import edu.uci.ics.texera.web.service.JobResultService._ +import edu.uci.ics.texera.web.service.WorkflowResultService._ import edu.uci.ics.texera.workflow.common.IncrementalOutputMode.{SET_DELTA, SET_SNAPSHOT} import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc @@ -12,34 +12,30 @@ class ProgressiveResultService( val sink: ProgressiveSinkOpDesc ) { - // derive the web output mode from the sink operator type - val webOutputMode: WebOutputMode = { - (sink.getOutputMode, sink.getChartType) match { - // visualization sinks use its corresponding mode - case (SET_SNAPSHOT, Some(_)) => SetSnapshotMode() - case (SET_DELTA, Some(_)) => SetDeltaMode() - // Non-visualization sinks use pagination mode - case (_, None) => PaginationMode() - } - } - /** - * All execution result tuples for this operator to this moment. * For SET_SNAPSHOT output mode: result is the latest snapshot * FOR SET_DELTA output mode: * - for insert-only delta: effectively the same as latest snapshot * - for insert-retract delta: the union of all delta outputs, not compacted to a snapshot - */ - - /** + * * Produces the WebResultUpdate to send to frontend from a result update from the engine. */ def convertWebResultUpdate(oldTupleCount: Int, newTupleCount: Int): WebResultUpdate = { + val webOutputMode: WebOutputMode = { + (sink.getOutputMode, sink.getChartType) match { + // visualization sinks use its corresponding mode + case (SET_SNAPSHOT, Some(_)) => SetSnapshotMode() + case (SET_DELTA, Some(_)) => SetDeltaMode() + // Non-visualization sinks use pagination mode + case (_, None) => PaginationMode() + } + } + val storage = sink.getStorage val webUpdate = (webOutputMode, sink.getOutputMode) match { case (PaginationMode(), SET_SNAPSHOT) => val numTuples = storage.getCount - val maxPageIndex = Math.ceil(numTuples / JobResultService.defaultPageSize.toDouble).toInt + val maxPageIndex = Math.ceil(numTuples / WorkflowResultService.defaultPageSize.toDouble).toInt WebPaginationUpdate( PaginationMode(), newTupleCount, diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala new file mode 100644 index 00000000000..90608ae033f --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala @@ -0,0 +1,72 @@ +package edu.uci.ics.texera.web.service + +import edu.uci.ics.texera.workflow.common.workflow.LogicalPlan + +import scala.collection.mutable + +class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { + + val equivalenceClass = new mutable.HashMap[String, Int]() + var nextClassId: Int = 0 + + def getNextClassId(): Int = { + nextClassId += 1 + nextClassId + } + + // checks the validity of the cache given the old plan and the new plan + // returns a map: + def checkCacheValidity(): Map[String, String] = { + + // for each operator in the old workflow, add it to its own equivalence class + oldWorkflow.jgraphtDag.iterator().forEachRemaining(opId => { + val oldId = "old-" + opId + equivalenceClass.put(oldId, nextClassId) + nextClassId += 1 + }) + + // for each operator in the new workflow + // check if + // 1: an operator with the same content can be found in the old workflow, and + // 2: the input operators are also in the same equivalence class + // + // if both conditions are met, then the two operators are equal, + // else a new equivalence class is created + newWorkflow.jgraphtDag.iterator().forEachRemaining(opId => { + val newOp = newWorkflow.getOperator(opId) + val newOpUpstreamClasses = newWorkflow.getUpstream(opId) + .map(op => equivalenceClass("new-" + op.operatorID)) + val oldOp = oldWorkflow.operators.find(op => op.equals(newOp)).orNull + + // check if the old workflow contains the same operator content + val newOpClassId = if (oldOp == null) { + getNextClassId() // operator not found, create a new class + } else{ + // check its inputs are all in the same equivalence class + val oldId = "old-" + oldOp.operatorID + val oldOpUpstreamClasses = oldWorkflow.getUpstream( oldOp.operatorID) + .map(op => equivalenceClass("old-" + op.operatorID)) + if (oldOpUpstreamClasses.equals(newOpUpstreamClasses)) { + equivalenceClass(oldId) // same equivalence class + } else { + getNextClassId() // inputs are no the same, new class + } + } + equivalenceClass.put("new-" + opId, newOpClassId) + }) + + // for each cached operator in the old workflow, + // check if it can be still used in the new workflow + oldWorkflow.cachedOperatorIds.map(cachedOpId => { + val oldCachedOpId = "old-" + cachedOpId + // find its equivalence class + val oldClassId = equivalenceClass(oldCachedOpId) + // find the corresponding operator that can still use this cache + val newOpId = equivalenceClass.find(p => p._2 == oldClassId && p._1 != oldCachedOpId) + .map(p => p._1).orNull + (newOpId, cachedOpId) + }).filter(p => p._1 != null && p._2 != null).toMap + } + +} + diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala index 68570aaf185..b9ef3e047ea 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala @@ -6,7 +6,7 @@ import edu.uci.ics.texera.web.model.websocket.event.CacheStatusUpdateEvent import edu.uci.ics.texera.web.{SubscriptionManager, WebsocketInput} import edu.uci.ics.texera.web.model.websocket.request.CacheStatusUpdateRequest import edu.uci.ics.texera.web.storage.WorkflowStateStore -import edu.uci.ics.texera.web.workflowcachestate.CacheState.{INVALID, VALID} +//import edu.uci.ics.texera.web.workflowcachestate.CacheState.{INVALID, VALID} import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.workflow.{LogicalPlan, WorkflowRewriter, WorkflowVertex} @@ -26,59 +26,63 @@ class WorkflowCacheService( ) extends SubscriptionManager with LazyLogging { - val cachedOperators: mutable.HashMap[String, OperatorDescriptor] = - mutable.HashMap[String, OperatorDescriptor]() - val cacheSourceOperators: mutable.HashMap[String, CacheSourceOpDesc] = - mutable.HashMap[String, CacheSourceOpDesc]() - val cacheSinkOperators: mutable.HashMap[String, ProgressiveSinkOpDesc] = - mutable.HashMap[String, ProgressiveSinkOpDesc]() - val operatorRecord: mutable.HashMap[String, WorkflowVertex] = - mutable.HashMap[String, WorkflowVertex]() + var lastLogicalPlan: LogicalPlan = null + +// val cachedOperators: mutable.HashMap[String, OperatorDescriptor] = +// mutable.HashMap[String, OperatorDescriptor]() +// val cacheSourceOperators: mutable.HashMap[String, CacheSourceOpDesc] = +// mutable.HashMap[String, CacheSourceOpDesc]() +// val cacheSinkOperators: mutable.HashMap[String, ProgressiveSinkOpDesc] = +// mutable.HashMap[String, ProgressiveSinkOpDesc]() +// val operatorRecord: mutable.HashMap[String, WorkflowVertex] = +// mutable.HashMap[String, WorkflowVertex]() + +// addSubscription( +// stateStore.cacheStore.registerDiffHandler((oldState, newState) => { +// Iterable(CacheStatusUpdateEvent(newState.operatorInfo.map { +// case (k, v) => (k, if (v.isInvalid) "cache invalid" else "cache valid") +// })) +// }) +// ) + - addSubscription( - stateStore.cacheStore.registerDiffHandler((oldState, newState) => { - Iterable(CacheStatusUpdateEvent(newState.operatorInfo.map { - case (k, v) => (k, if (v.isInvalid) "cache invalid" else "cache valid") - })) - }) - ) addSubscription(wsInput.subscribe((req: CacheStatusUpdateRequest, uidOpt) => { - updateCacheStatus(req) +// updateCacheStatus(req) })) - def updateCacheStatus(request: CacheStatusUpdateRequest): Unit = { - val logicalPlan = - LogicalPlan(request.operators, request.links, request.breakpoints, request.cachedOperatorIds) - logicalPlan.cachedOperatorIds = request.cachedOperatorIds - logger.debug(s"Cached operators: $cachedOperators with ${request.cachedOperatorIds}") - val workflowRewriter = new WorkflowRewriter( - logicalPlan, - cachedOperators.clone(), - cacheSourceOperators.clone(), - cacheSinkOperators.clone(), - operatorRecord.clone(), - opResultStorage - ) - - val invalidSet = workflowRewriter.cacheStatusUpdate() - stateStore.cacheStore.updateState { oldState => - oldState.withOperatorInfo( - request.cachedOperatorIds - .filter(cachedOperators.contains) - .map(id => { - if (cachedOperators.contains(id)) { - if (!invalidSet.contains(id)) { - (id, VALID) - } else { - (id, INVALID) - } - } else { - (id, INVALID) - } - }) - .toMap - ) - } - } +// def updateCacheStatus(request: CacheStatusUpdateRequest): Unit = { +// val logicalPlan = +// LogicalPlan(request.operators, request.links, request.breakpoints, request.cachedOperatorIds) +// logicalPlan.cachedOperatorIds = request.cachedOperatorIds +// logger.debug(s"Cached operators: $cachedOperators with ${request.cachedOperatorIds}") +// val workflowRewriter = new WorkflowRewriter( +// logicalPlan, +// cachedOperators.clone(), +// cacheSourceOperators.clone(), +// cacheSinkOperators.clone(), +// operatorRecord.clone(), +// opResultStorage +// ) +// +// val invalidSet = workflowRewriter.cacheStatusUpdate() +// stateStore.cacheStore.updateState { oldState => +// oldState.withOperatorInfo( +// request.cachedOperatorIds +// .filter(cachedOperators.contains) +// .map(id => { +// if (cachedOperators.contains(id)) { +// if (!invalidSet.contains(id)) { +// (id, VALID) +// } else { +// (id, INVALID) +// } +// } else { +// (id, INVALID) +// } +// }) +// .toMap +// ) +// } +// } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index 04b07a62623..2c7785948f2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -22,7 +22,7 @@ import edu.uci.ics.texera.workflow.operators.udf.python.{ class WorkflowJobService( workflowContext: WorkflowContext, wsInput: WebsocketInput, - resultService: JobResultService, + resultService: WorkflowResultService, request: WorkflowExecuteRequest, errorHandler: Throwable => Unit, engineVersion: String diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala similarity index 92% rename from core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala index a2f7ade73ed..2d7b25840d7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala @@ -8,15 +8,10 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.FatalErr import edu.uci.ics.amber.engine.common.AmberUtils import edu.uci.ics.amber.engine.common.client.AmberClient import edu.uci.ics.amber.engine.common.tuple.ITuple -import edu.uci.ics.texera.web.model.websocket.event.{ - PaginatedResultEvent, - TexeraWebSocketEvent, - WebResultUpdateEvent -} +import edu.uci.ics.texera.web.model.websocket.event.{PaginatedResultEvent, TexeraWebSocketEvent, WebResultUpdateEvent} import edu.uci.ics.texera.web.model.websocket.request.ResultPaginationRequest -import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate -import edu.uci.ics.texera.web.storage.{JobStateStore, WorkflowStateStore} -import edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata +import edu.uci.ics.texera.web.service.WorkflowResultService.WebResultUpdate +import edu.uci.ics.texera.web.storage.{JobStateStore, OperatorResultMetadata, WorkflowResultStore, WorkflowStateStore} import edu.uci.ics.texera.web.workflowruntimestate.JobMetadataStore import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING import edu.uci.ics.texera.web.{SubscriptionManager, TexeraWebApplication} @@ -31,7 +26,7 @@ import java.util.UUID import scala.collection.mutable import scala.concurrent.duration.DurationInt -object JobResultService { +object WorkflowResultService { val defaultPageSize: Int = 5 @@ -118,7 +113,7 @@ object JobResultService { * - update the result data for each operator, * - send result update event to the frontend */ -class JobResultService( +class WorkflowResultService( val opResultStorage: OpResultStorage, val workflowStateStore: WorkflowStateStore ) extends SubscriptionManager { @@ -181,9 +176,9 @@ class JobResultService( addSubscription( workflowStateStore.resultStore.registerDiffHandler((oldState, newState) => { val buf = mutable.HashMap[String, WebResultUpdate]() - newState.operatorInfo.foreach { + newState.resultInfo.foreach { case (opId, info) => - val oldInfo = oldState.operatorInfo.getOrElse(opId, new OperatorResultMetadata()) + val oldInfo = oldState.resultInfo.getOrElse(opId, OperatorResultMetadata()) buf(opId) = progressiveResults(opId).convertWebResultUpdate(oldInfo.tupleCount, info.tupleCount) } @@ -193,8 +188,8 @@ class JobResultService( // first clear all the results progressiveResults.clear() - workflowStateStore.resultStore.updateState { state => - state.withOperatorInfo(Map.empty) + workflowStateStore.resultStore.updateState { _ => + WorkflowResultStore() // empty result store } // If we have cache sources, make dummy sink operators for displaying results on the frontend. @@ -243,7 +238,7 @@ class JobResultService( } def onResultUpdate(): Unit = { - workflowStateStore.resultStore.updateState { oldState => + workflowStateStore.resultStore.updateState { _ => val newInfo: Map[String, OperatorResultMetadata] = progressiveResults.map { case (id, service) => val count = service.sink.getStorage.getCount.toInt @@ -254,7 +249,7 @@ class JobResultService( } else "" (id, OperatorResultMetadata(count, changeDetector)) }.toMap - oldState.withOperatorInfo(newInfo) + WorkflowResultStore(newInfo) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala index 17624b9d92c..eeaeb626d58 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala @@ -7,7 +7,6 @@ import scala.collection.JavaConverters._ import edu.uci.ics.texera.web.model.websocket.event.{TexeraWebSocketEvent, WorkflowErrorEvent} import edu.uci.ics.texera.web.{SubscriptionManager, WebsocketInput, WorkflowLifecycleManager} import edu.uci.ics.texera.web.model.websocket.request.{ - CacheStatusUpdateRequest, WorkflowExecuteRequest, WorkflowKillRequest } @@ -69,8 +68,8 @@ class WorkflowService( } val wsInput = new WebsocketInput(errorHandler) val stateStore = new WorkflowStateStore() - val resultService: JobResultService = - new JobResultService(opResultStorage, stateStore) + val resultService: WorkflowResultService = + new WorkflowResultService(opResultStorage, stateStore) val exportService: ResultExportService = new ResultExportService(opResultStorage, UInteger.valueOf(wId)) val operatorCache: WorkflowCacheService = @@ -124,20 +123,9 @@ class WorkflowService( } private[this] def createWorkflowContext( - request: WorkflowExecuteRequest, uidOpt: Option[UInteger] ): WorkflowContext = { val jobID: String = String.valueOf(WorkflowWebsocketResource.nextExecutionID.incrementAndGet) - if (WorkflowCacheService.isAvailable) { - operatorCache.updateCacheStatus( - CacheStatusUpdateRequest( - request.logicalPlan.operators, - request.logicalPlan.links, - request.logicalPlan.breakpoints, - request.logicalPlan.cachedOperatorIds - ) - ) - } new WorkflowContext(jobID, uidOpt, UInteger.valueOf(wId)) } @@ -148,9 +136,8 @@ class WorkflowService( } val job = new WorkflowJobService( - createWorkflowContext(req, uidOpt), + createWorkflowContext(uidOpt), wsInput, - operatorCache, resultService, req, errorHandler, diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala new file mode 100644 index 00000000000..dfea2230128 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala @@ -0,0 +1,7 @@ +package edu.uci.ics.texera.web.storage + +case class OperatorResultMetadata(tupleCount: Int = 0, changeDetector: String = "") + +case class WorkflowResultStore ( + resultInfo: Map[String, OperatorResultMetadata] = Map.empty +) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowStateStore.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowStateStore.scala index 916800a28ef..c11f292f653 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowStateStore.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowStateStore.scala @@ -1,15 +1,11 @@ package edu.uci.ics.texera.web.storage -import edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore -import edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore - // states that across executions. class WorkflowStateStore { - val cacheStore = new StateStore(WorkflowCacheStore()) val resultStore = new StateStore(WorkflowResultStore()) def getAllStores: Iterable[StateStore[_]] = { - Iterable(cacheStore, resultStore) + Iterable(resultStore) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/storage/OpResultStorage.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/storage/OpResultStorage.scala index cbd5665d3f1..ab8d211186d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/storage/OpResultStorage.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/storage/OpResultStorage.scala @@ -50,6 +50,11 @@ class OpResultStorage extends Serializable with LazyLogging { new MemoryStorage(schema) } } + // remove any existing storage with duplicate key + if (cache.containsKey(key)) { + cache.get(key).clear() // clear the storage content + cache.remove(key) + } cache.put(key, storage) storage } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index a53e988689b..b37d4e16438 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -15,7 +15,7 @@ import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.visualization.VisualizationConstants import org.jgrapht.graph.DirectedAcyclicGraph -import scala.collection.mutable +import scala.collection.{JavaConverters, mutable} import scala.collection.mutable.ArrayBuffer case class BreakpointInfo(operatorID: String, breakpoint: Breakpoint) @@ -40,8 +40,7 @@ object LogicalPlan { } def apply(pojo: LogicalPlanPojo): LogicalPlan = - LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, List()) - + LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, List()).normalize() } case class LogicalPlan( @@ -85,6 +84,10 @@ case class LogicalPlan( def getTerminalOperators: List[String] = this.terminalOperators + def getAncestorOpIds(operatorID: String): Set[String] = { + JavaConverters.asScalaSet(jgraphtDag.getAncestors(operatorID)).toSet + } + def getUpstream(operatorID: String): List[OperatorDescriptor] = { val upstream = new mutable.MutableList[OperatorDescriptor] jgraphtDag @@ -93,6 +96,10 @@ case class LogicalPlan( upstream.toList } + def getUpstreamEdges(operatorID: String): List[OperatorLink] = { + links.filter(l => l.destination.operatorID == operatorID) + } + def addOperator(operatorDescriptor: OperatorDescriptor): LogicalPlan = { this.copy(operators :+ operatorDescriptor, links, breakpoints, cachedOperatorIds) } @@ -100,9 +107,9 @@ case class LogicalPlan( def removeOperator(operatorId: String): LogicalPlan = { this.copy( operators.filter(o => o.operatorID == operatorId), - links, - breakpoints, - cachedOperatorIds + links.filter(l => l.origin.operatorID == operatorId || l.destination.operatorID == operatorId), + breakpoints.filter(b => b.operatorID == operatorId), + cachedOperatorIds.filter(c => c == operatorId) ) } @@ -130,6 +137,13 @@ case class LogicalPlan( this.copy(operators, newLinks, breakpoints, cachedOperatorIds) } + def removeEdge( + edge: OperatorLink + ): LogicalPlan = { + val newLinks = links.filter(l => l != edge) + this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + } + def getDownstream(operatorID: String): List[OperatorDescriptor] = { val downstream = new mutable.MutableList[OperatorDescriptor] jgraphtDag @@ -138,6 +152,10 @@ case class LogicalPlan( downstream.toList } + def getDownstreamEdges(operatorID: String): List[OperatorLink] = { + links.filter(l => l.origin.operatorID == operatorID) + } + def opSchemaInfo(operatorID: String): OperatorSchemaInfo = { val op = operatorMap(operatorID) val inputSchemas: Array[Schema] = @@ -305,4 +323,8 @@ case class LogicalPlan( physicalPlan } + def normalize(): LogicalPlan = { + SinkInjectionTransformer.transform(this) + } + } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala index eb64788d214..e5effeca57c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala @@ -7,27 +7,37 @@ import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator object SinkInjectionTransformer { - def transform(logicalPlan: LogicalPlan, context: WorkflowContext): LogicalPlan = { + def transform(logicalPlan: LogicalPlan): LogicalPlan = { var resultPlan = logicalPlan // for any terminal operator without a sink, add a sink - logicalPlan.getTerminalOperators.foreach(terminalOpId => { - val terminalOp = logicalPlan.getOperator(terminalOpId) - if (!terminalOp.isInstanceOf[SinkOpDesc]) { - terminalOp.operatorInfo.outputPorts.indices.foreach(out => { - val sink = new ProgressiveSinkOpDesc() - sink.setContext(context) - resultPlan = resultPlan - .addOperator(sink) - .addEdge(terminalOp.operatorID, sink.operatorID, out, 0) - }) - } + val nonSinkTerminalOps = logicalPlan.getTerminalOperators.filter(opId => + ! logicalPlan.getOperator(opId).isInstanceOf[SinkOpDesc] + ) + // for any operators marked as cache (view result) without a sink, add a sink + val viewResultOps = logicalPlan.cachedOperatorIds.filter(opId => + ! logicalPlan.getDownstream(opId).exists(op => op.isInstanceOf[SinkOpDesc]) + ) + + val operatorsToAddSink = (nonSinkTerminalOps ++ viewResultOps).toSet + operatorsToAddSink.foreach(opId => { + val op = logicalPlan.getOperator(opId) + op.operatorInfo.outputPorts.indices.foreach(outPort => { + val sink = new ProgressiveSinkOpDesc() + resultPlan = resultPlan + .addOperator(sink) + .addEdge(op.operatorID, sink.operatorID, outPort) + }) }) + // check precondition: all the terminal operators should sinks now + assert(resultPlan.getTerminalOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) + + var finalCachedOpIds = Set[String]() + // for each sink: // set the corresponding upstream ID and port // set output mode based on the visualization operator before it - // precondition: all the terminal operators are sinks resultPlan.getTerminalOperators.foreach(sinkOpId => { val sinkOp = resultPlan.getOperator(sinkOpId).asInstanceOf[ProgressiveSinkOpDesc] val upstream = resultPlan.getUpstream(sinkOpId).headOption @@ -35,7 +45,9 @@ object SinkInjectionTransformer { l.origin.operatorID == upstream.map(_.operatorID).orNull && l.destination.operatorID == sinkOpId ) + assert(upstream.nonEmpty) if (upstream.nonEmpty && edge.nonEmpty) { + finalCachedOpIds += upstream.get // set upstream ID and port sinkOp.setUpstreamId(upstream.get.operatorID) sinkOp.setUpstreamPort(edge.get.origin.portOrdinal) @@ -51,7 +63,7 @@ object SinkInjectionTransformer { } }) - resultPlan + resultPlan.copy(cachedOperatorIds = finalCachedOpIds.toList) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala new file mode 100644 index 00000000000..a325e343496 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala @@ -0,0 +1,50 @@ +package edu.uci.ics.texera.workflow.common.workflow + +import edu.uci.ics.texera.workflow.common.storage.OpResultStorage +import edu.uci.ics.texera.workflow.operators.sink.SinkOpDesc +import edu.uci.ics.texera.workflow.operators.source.cache.CacheSourceOpDesc + +object WorkflowCacheRewriter { + + def transform( + logicalPlan: LogicalPlan, + storage: OpResultStorage, + availableCache: Map[String, String], // key: operator ID in workflow, value: cache key in storage + operatorsToUseCache: Set[String], // user-specified operators to reuse cache if possible + ): LogicalPlan = { + var resultPlan = logicalPlan + + operatorsToUseCache.intersect(availableCache.keySet).foreach(opId => { + val cacheId = availableCache(opId) + val materializationReader = new CacheSourceOpDesc(cacheId, storage) + resultPlan = resultPlan.addOperator(materializationReader) + // replace the connection of all outgoing edges of opId with the cache + val edgesToReplace = resultPlan.getUpstreamEdges(opId) + edgesToReplace.foreach(e => { + resultPlan = resultPlan.removeEdge(e.origin.operatorID, e.destination.operatorID, + e.origin.portOrdinal, e.destination.portOrdinal) + resultPlan = resultPlan.addEdge(materializationReader.operatorID, e.destination.operatorID, + 0, e.destination.portOrdinal) + }) + }) + + // remove sinks directly connected to operators that are already cached + val unnecessarySinks = resultPlan.getTerminalOperators.filter(sink => { + availableCache.contains(resultPlan.getUpstream(sink).head.operatorID) + }) + unnecessarySinks.foreach(o => { + resultPlan = resultPlan.removeOperator(o) + }) + + // operators that are no longer reachable by any sink don't need to run + val allOperators = resultPlan.operators.map(op => op.operatorID).toSet + assert(allOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) + val usefulOperators = resultPlan.terminalOperators.flatMap(o => resultPlan.getAncestorOpIds(o)).toSet + allOperators.diff(usefulOperators).foreach(o => { + resultPlan = resultPlan.removeOperator(o) + }) + + resultPlan + } + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala index 2d2473ed033..9f6273754a7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala @@ -34,7 +34,7 @@ class CacheSourceOpDesc(val targetSinkStorageId: String, opResultStorage: OpResu OperatorInfo( "Cache Source Operator", "Retrieve the cached output to src", - OperatorGroupConstants.RESULT_GROUP, + OperatorGroupConstants.UTILITY_GROUP, List.empty, asScalaBuffer(singletonList(OutputPort(""))).toList ) diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/CacheState.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/CacheState.scala deleted file mode 100644 index e0a028546ce..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/CacheState.scala +++ /dev/null @@ -1,44 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowcachestate - -sealed abstract class CacheState(val value: _root_.scala.Int) extends _root_.scalapb.GeneratedEnum { - type EnumType = CacheState - def isValid: _root_.scala.Boolean = false - def isInvalid: _root_.scala.Boolean = false - def companion: _root_.scalapb.GeneratedEnumCompanion[CacheState] = edu.uci.ics.texera.web.workflowcachestate.CacheState - final def asRecognized: _root_.scala.Option[edu.uci.ics.texera.web.workflowcachestate.CacheState.Recognized] = if (isUnrecognized) _root_.scala.None else _root_.scala.Some(this.asInstanceOf[edu.uci.ics.texera.web.workflowcachestate.CacheState.Recognized]) -} - -object CacheState extends _root_.scalapb.GeneratedEnumCompanion[CacheState] { - sealed trait Recognized extends CacheState - implicit def enumCompanion: _root_.scalapb.GeneratedEnumCompanion[CacheState] = this - @SerialVersionUID(0L) - case object VALID extends CacheState(0) with CacheState.Recognized { - val index = 0 - val name = "VALID" - override def isValid: _root_.scala.Boolean = true - } - - @SerialVersionUID(0L) - case object INVALID extends CacheState(1) with CacheState.Recognized { - val index = 1 - val name = "INVALID" - override def isInvalid: _root_.scala.Boolean = true - } - - @SerialVersionUID(0L) - final case class Unrecognized(unrecognizedValue: _root_.scala.Int) extends CacheState(unrecognizedValue) with _root_.scalapb.UnrecognizedEnum - - lazy val values = scala.collection.immutable.Seq(VALID, INVALID) - def fromValue(__value: _root_.scala.Int): CacheState = __value match { - case 0 => VALID - case 1 => INVALID - case __other => Unrecognized(__other) - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.EnumDescriptor = WorkflowcachestateProto.javaDescriptor.getEnumTypes().get(0) - def scalaDescriptor: _root_.scalapb.descriptors.EnumDescriptor = WorkflowcachestateProto.scalaDescriptor.enums(0) -} \ No newline at end of file diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowCacheStore.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowCacheStore.scala deleted file mode 100644 index e7dc8d7d512..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowCacheStore.scala +++ /dev/null @@ -1,249 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowcachestate - -@SerialVersionUID(0L) -final case class WorkflowCacheStore( - operatorInfo: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState] = _root_.scala.collection.immutable.Map.empty - ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[WorkflowCacheStore] { - @transient - private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 - private[this] def __computeSerializedValue(): _root_.scala.Int = { - var __size = 0 - operatorInfo.foreach { __item => - val __value = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toBase(__item) - __size += 1 + _root_.com.google.protobuf.CodedOutputStream.computeUInt32SizeNoTag(__value.serializedSize) + __value.serializedSize - } - __size - } - override def serializedSize: _root_.scala.Int = { - var read = __serializedSizeCachedValue - if (read == 0) { - read = __computeSerializedValue() - __serializedSizeCachedValue = read - } - read - } - def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): _root_.scala.Unit = { - operatorInfo.foreach { __v => - val __m = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toBase(__v) - _output__.writeTag(1, 2) - _output__.writeUInt32NoTag(__m.serializedSize) - __m.writeTo(_output__) - }; - } - def clearOperatorInfo = copy(operatorInfo = _root_.scala.collection.immutable.Map.empty) - def addOperatorInfo(__vs: (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)*): WorkflowCacheStore = addAllOperatorInfo(__vs) - def addAllOperatorInfo(__vs: Iterable[(_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)]): WorkflowCacheStore = copy(operatorInfo = operatorInfo ++ __vs) - def withOperatorInfo(__v: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState]): WorkflowCacheStore = copy(operatorInfo = __v) - def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 1 => operatorInfo.iterator.map(edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toBase(_)).toSeq - } - } - def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = { - _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) - (__field.number: @_root_.scala.unchecked) match { - case 1 => _root_.scalapb.descriptors.PRepeated(operatorInfo.iterator.map(edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toBase(_).toPMessage).toVector) - } - } - def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) - def companion = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore - // @@protoc_insertion_point(GeneratedMessage[edu.uci.ics.texera.web.WorkflowCacheStore]) -} - -object WorkflowCacheStore extends scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore] { - implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore] = this - def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore = { - val __operatorInfo: _root_.scala.collection.mutable.Builder[(_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState), _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState]] = _root_.scala.collection.immutable.Map.newBuilder[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState] - var _done__ = false - while (!_done__) { - val _tag__ = _input__.readTag() - _tag__ match { - case 0 => _done__ = true - case 10 => - __operatorInfo += edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toCustom(_root_.scalapb.LiteParser.readMessage[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry](_input__)) - case tag => _input__.skipField(tag) - } - } - edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore( - operatorInfo = __operatorInfo.result() - ) - } - implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore] = _root_.scalapb.descriptors.Reads{ - case _root_.scalapb.descriptors.PMessage(__fieldsMap) => - _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") - edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore( - operatorInfo = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[_root_.scala.Seq[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry]]).getOrElse(_root_.scala.Seq.empty).iterator.map(edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore._typemapper_operatorInfo.toCustom(_)).toMap - ) - case _ => throw new RuntimeException("Expected PMessage") - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = WorkflowcachestateProto.javaDescriptor.getMessageTypes().get(0) - def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = WorkflowcachestateProto.scalaDescriptor.messages(0) - def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = { - var __out: _root_.scalapb.GeneratedMessageCompanion[_] = null - (__number: @_root_.scala.unchecked) match { - case 1 => __out = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry - } - __out - } - lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = - Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]]( - _root_.edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry - ) - def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber) - lazy val defaultInstance = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore( - operatorInfo = _root_.scala.collection.immutable.Map.empty - ) - @SerialVersionUID(0L) - final case class OperatorInfoEntry( - key: _root_.scala.Predef.String = "", - value: edu.uci.ics.texera.web.workflowcachestate.CacheState = edu.uci.ics.texera.web.workflowcachestate.CacheState.VALID - ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[OperatorInfoEntry] { - @transient - private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 - private[this] def __computeSerializedValue(): _root_.scala.Int = { - var __size = 0 - - { - val __value = key - if (!__value.isEmpty) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(1, __value) - } - }; - - { - val __value = value.value - if (__value != 0) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeEnumSize(2, __value) - } - }; - __size - } - override def serializedSize: _root_.scala.Int = { - var read = __serializedSizeCachedValue - if (read == 0) { - read = __computeSerializedValue() - __serializedSizeCachedValue = read - } - read - } - def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): _root_.scala.Unit = { - { - val __v = key - if (!__v.isEmpty) { - _output__.writeString(1, __v) - } - }; - { - val __v = value.value - if (__v != 0) { - _output__.writeEnum(2, __v) - } - }; - } - def withKey(__v: _root_.scala.Predef.String): OperatorInfoEntry = copy(key = __v) - def withValue(__v: edu.uci.ics.texera.web.workflowcachestate.CacheState): OperatorInfoEntry = copy(value = __v) - def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 1 => { - val __t = key - if (__t != "") __t else null - } - case 2 => { - val __t = value.javaValueDescriptor - if (__t.getNumber() != 0) __t else null - } - } - } - def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = { - _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) - (__field.number: @_root_.scala.unchecked) match { - case 1 => _root_.scalapb.descriptors.PString(key) - case 2 => _root_.scalapb.descriptors.PEnum(value.scalaValueDescriptor) - } - } - def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) - def companion = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry - // @@protoc_insertion_point(GeneratedMessage[edu.uci.ics.texera.web.WorkflowCacheStore.OperatorInfoEntry]) - } - - object OperatorInfoEntry extends scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry] { - implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry] = this - def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry = { - var __key: _root_.scala.Predef.String = "" - var __value: edu.uci.ics.texera.web.workflowcachestate.CacheState = edu.uci.ics.texera.web.workflowcachestate.CacheState.VALID - var _done__ = false - while (!_done__) { - val _tag__ = _input__.readTag() - _tag__ match { - case 0 => _done__ = true - case 10 => - __key = _input__.readStringRequireUtf8() - case 16 => - __value = edu.uci.ics.texera.web.workflowcachestate.CacheState.fromValue(_input__.readEnum()) - case tag => _input__.skipField(tag) - } - } - edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry( - key = __key, - value = __value - ) - } - implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry] = _root_.scalapb.descriptors.Reads{ - case _root_.scalapb.descriptors.PMessage(__fieldsMap) => - _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") - edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry( - key = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[_root_.scala.Predef.String]).getOrElse(""), - value = edu.uci.ics.texera.web.workflowcachestate.CacheState.fromValue(__fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).map(_.as[_root_.scalapb.descriptors.EnumValueDescriptor]).getOrElse(edu.uci.ics.texera.web.workflowcachestate.CacheState.VALID.scalaValueDescriptor).number) - ) - case _ => throw new RuntimeException("Expected PMessage") - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.javaDescriptor.getNestedTypes().get(0) - def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.scalaDescriptor.nestedMessages(0) - def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = throw new MatchError(__number) - lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = Seq.empty - def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 2 => edu.uci.ics.texera.web.workflowcachestate.CacheState - } - } - lazy val defaultInstance = edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry( - key = "", - value = edu.uci.ics.texera.web.workflowcachestate.CacheState.VALID - ) - implicit class OperatorInfoEntryLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry](_l) { - def key: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.key)((c_, f_) => c_.copy(key = f_)) - def value: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowcachestate.CacheState] = field(_.value)((c_, f_) => c_.copy(value = f_)) - } - final val KEY_FIELD_NUMBER = 1 - final val VALUE_FIELD_NUMBER = 2 - @transient - implicit val keyValueMapper: _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)] = - _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)](__m => (__m.key, __m.value))(__p => edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry(__p._1, __p._2)) - def of( - key: _root_.scala.Predef.String, - value: edu.uci.ics.texera.web.workflowcachestate.CacheState - ): _root_.edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry = _root_.edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry( - key, - value - ) - // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.texera.web.WorkflowCacheStore.OperatorInfoEntry]) - } - - implicit class WorkflowCacheStoreLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore](_l) { - def operatorInfo: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState]] = field(_.operatorInfo)((c_, f_) => c_.copy(operatorInfo = f_)) - } - final val OPERATOR_INFO_FIELD_NUMBER = 1 - @transient - private[workflowcachestate] val _typemapper_operatorInfo: _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)] = implicitly[_root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState)]] - def of( - operatorInfo: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowcachestate.CacheState] - ): _root_.edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore = _root_.edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore( - operatorInfo - ) - // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.texera.web.WorkflowCacheStore]) -} diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowcachestateProto.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowcachestateProto.scala deleted file mode 100644 index 9beb6091edd..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowcachestate/WorkflowcachestateProto.scala +++ /dev/null @@ -1,37 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowcachestate - -object WorkflowcachestateProto extends _root_.scalapb.GeneratedFileObject { - lazy val dependencies: Seq[_root_.scalapb.GeneratedFileObject] = Seq( - scalapb.options.ScalapbProto - ) - lazy val messagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = - Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]]( - edu.uci.ics.texera.web.workflowcachestate.WorkflowCacheStore - ) - private lazy val ProtoBytes: _root_.scala.Array[Byte] = - scalapb.Encoding.fromBase64(scala.collection.immutable.Seq( - """CitlZHUvdWNpL2ljcy90ZXhlcmEvd29ya2Zsb3djYWNoZXN0YXRlLnByb3RvEhZlZHUudWNpLmljcy50ZXhlcmEud2ViGhVzY - 2FsYXBiL3NjYWxhcGIucHJvdG8ihQIKEldvcmtmbG93Q2FjaGVTdG9yZRJ0Cg1vcGVyYXRvcl9pbmZvGAEgAygLMjwuZWR1LnVja - S5pY3MudGV4ZXJhLndlYi5Xb3JrZmxvd0NhY2hlU3RvcmUuT3BlcmF0b3JJbmZvRW50cnlCEeI/DhIMb3BlcmF0b3JJbmZvUgxvc - GVyYXRvckluZm8aeQoRT3BlcmF0b3JJbmZvRW50cnkSGgoDa2V5GAEgASgJQgjiPwUSA2tleVIDa2V5EkQKBXZhbHVlGAIgASgOM - iIuZWR1LnVjaS5pY3MudGV4ZXJhLndlYi5DYWNoZVN0YXRlQgriPwcSBXZhbHVlUgV2YWx1ZToCOAEqJAoKQ2FjaGVTdGF0ZRIJC - gVWQUxJRBAAEgsKB0lOVkFMSUQQAUIJ4j8GSABYAHgAYgZwcm90bzM=""" - ).mkString) - lazy val scalaDescriptor: _root_.scalapb.descriptors.FileDescriptor = { - val scalaProto = com.google.protobuf.descriptor.FileDescriptorProto.parseFrom(ProtoBytes) - _root_.scalapb.descriptors.FileDescriptor.buildFrom(scalaProto, dependencies.map(_.scalaDescriptor)) - } - lazy val javaDescriptor: com.google.protobuf.Descriptors.FileDescriptor = { - val javaProto = com.google.protobuf.DescriptorProtos.FileDescriptorProto.parseFrom(ProtoBytes) - com.google.protobuf.Descriptors.FileDescriptor.buildFrom(javaProto, _root_.scala.Array( - scalapb.options.ScalapbProto.javaDescriptor - )) - } - @deprecated("Use javaDescriptor instead. In a future version this will refer to scalaDescriptor.", "ScalaPB 0.5.47") - def descriptor: com.google.protobuf.Descriptors.FileDescriptor = javaDescriptor -} \ No newline at end of file diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/OperatorResultMetadata.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/OperatorResultMetadata.scala deleted file mode 100644 index 054fef80819..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/OperatorResultMetadata.scala +++ /dev/null @@ -1,135 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowresultstate - -@SerialVersionUID(0L) -final case class OperatorResultMetadata( - tupleCount: _root_.scala.Int = 0, - changeDetector: _root_.scala.Predef.String = "" - ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[OperatorResultMetadata] { - @transient - private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 - private[this] def __computeSerializedValue(): _root_.scala.Int = { - var __size = 0 - - { - val __value = tupleCount - if (__value != 0) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeInt32Size(1, __value) - } - }; - - { - val __value = changeDetector - if (!__value.isEmpty) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(2, __value) - } - }; - __size - } - override def serializedSize: _root_.scala.Int = { - var read = __serializedSizeCachedValue - if (read == 0) { - read = __computeSerializedValue() - __serializedSizeCachedValue = read - } - read - } - def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): _root_.scala.Unit = { - { - val __v = tupleCount - if (__v != 0) { - _output__.writeInt32(1, __v) - } - }; - { - val __v = changeDetector - if (!__v.isEmpty) { - _output__.writeString(2, __v) - } - }; - } - def withTupleCount(__v: _root_.scala.Int): OperatorResultMetadata = copy(tupleCount = __v) - def withChangeDetector(__v: _root_.scala.Predef.String): OperatorResultMetadata = copy(changeDetector = __v) - def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 1 => { - val __t = tupleCount - if (__t != 0) __t else null - } - case 2 => { - val __t = changeDetector - if (__t != "") __t else null - } - } - } - def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = { - _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) - (__field.number: @_root_.scala.unchecked) match { - case 1 => _root_.scalapb.descriptors.PInt(tupleCount) - case 2 => _root_.scalapb.descriptors.PString(changeDetector) - } - } - def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) - def companion = edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata - // @@protoc_insertion_point(GeneratedMessage[edu.uci.ics.texera.web.OperatorResultMetadata]) -} - -object OperatorResultMetadata extends scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] { - implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = this - def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata = { - var __tupleCount: _root_.scala.Int = 0 - var __changeDetector: _root_.scala.Predef.String = "" - var _done__ = false - while (!_done__) { - val _tag__ = _input__.readTag() - _tag__ match { - case 0 => _done__ = true - case 8 => - __tupleCount = _input__.readInt32() - case 18 => - __changeDetector = _input__.readStringRequireUtf8() - case tag => _input__.skipField(tag) - } - } - edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata( - tupleCount = __tupleCount, - changeDetector = __changeDetector - ) - } - implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = _root_.scalapb.descriptors.Reads{ - case _root_.scalapb.descriptors.PMessage(__fieldsMap) => - _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") - edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata( - tupleCount = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[_root_.scala.Int]).getOrElse(0), - changeDetector = __fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).map(_.as[_root_.scala.Predef.String]).getOrElse("") - ) - case _ => throw new RuntimeException("Expected PMessage") - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = WorkflowresultstateProto.javaDescriptor.getMessageTypes().get(1) - def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = WorkflowresultstateProto.scalaDescriptor.messages(1) - def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = throw new MatchError(__number) - lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = Seq.empty - def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber) - lazy val defaultInstance = edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata( - tupleCount = 0, - changeDetector = "" - ) - implicit class OperatorResultMetadataLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata](_l) { - def tupleCount: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Int] = field(_.tupleCount)((c_, f_) => c_.copy(tupleCount = f_)) - def changeDetector: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.changeDetector)((c_, f_) => c_.copy(changeDetector = f_)) - } - final val TUPLE_COUNT_FIELD_NUMBER = 1 - final val CHANGE_DETECTOR_FIELD_NUMBER = 2 - def of( - tupleCount: _root_.scala.Int, - changeDetector: _root_.scala.Predef.String - ): _root_.edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata = _root_.edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata( - tupleCount, - changeDetector - ) - // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.texera.web.OperatorResultMetadata]) -} diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowResultStore.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowResultStore.scala deleted file mode 100644 index d514ae7bae3..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowResultStore.scala +++ /dev/null @@ -1,248 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowresultstate - -@SerialVersionUID(0L) -final case class WorkflowResultStore( - operatorInfo: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = _root_.scala.collection.immutable.Map.empty - ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[WorkflowResultStore] { - @transient - private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 - private[this] def __computeSerializedValue(): _root_.scala.Int = { - var __size = 0 - operatorInfo.foreach { __item => - val __value = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toBase(__item) - __size += 1 + _root_.com.google.protobuf.CodedOutputStream.computeUInt32SizeNoTag(__value.serializedSize) + __value.serializedSize - } - __size - } - override def serializedSize: _root_.scala.Int = { - var read = __serializedSizeCachedValue - if (read == 0) { - read = __computeSerializedValue() - __serializedSizeCachedValue = read - } - read - } - def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): _root_.scala.Unit = { - operatorInfo.foreach { __v => - val __m = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toBase(__v) - _output__.writeTag(1, 2) - _output__.writeUInt32NoTag(__m.serializedSize) - __m.writeTo(_output__) - }; - } - def clearOperatorInfo = copy(operatorInfo = _root_.scala.collection.immutable.Map.empty) - def addOperatorInfo(__vs: (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)*): WorkflowResultStore = addAllOperatorInfo(__vs) - def addAllOperatorInfo(__vs: Iterable[(_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)]): WorkflowResultStore = copy(operatorInfo = operatorInfo ++ __vs) - def withOperatorInfo(__v: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]): WorkflowResultStore = copy(operatorInfo = __v) - def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 1 => operatorInfo.iterator.map(edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toBase(_)).toSeq - } - } - def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = { - _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) - (__field.number: @_root_.scala.unchecked) match { - case 1 => _root_.scalapb.descriptors.PRepeated(operatorInfo.iterator.map(edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toBase(_).toPMessage).toVector) - } - } - def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) - def companion = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore - // @@protoc_insertion_point(GeneratedMessage[edu.uci.ics.texera.web.WorkflowResultStore]) -} - -object WorkflowResultStore extends scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore] { - implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore] = this - def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore = { - val __operatorInfo: _root_.scala.collection.mutable.Builder[(_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata), _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]] = _root_.scala.collection.immutable.Map.newBuilder[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] - var _done__ = false - while (!_done__) { - val _tag__ = _input__.readTag() - _tag__ match { - case 0 => _done__ = true - case 10 => - __operatorInfo += edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toCustom(_root_.scalapb.LiteParser.readMessage[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry](_input__)) - case tag => _input__.skipField(tag) - } - } - edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore( - operatorInfo = __operatorInfo.result() - ) - } - implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore] = _root_.scalapb.descriptors.Reads{ - case _root_.scalapb.descriptors.PMessage(__fieldsMap) => - _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") - edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore( - operatorInfo = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[_root_.scala.Seq[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry]]).getOrElse(_root_.scala.Seq.empty).iterator.map(edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore._typemapper_operatorInfo.toCustom(_)).toMap - ) - case _ => throw new RuntimeException("Expected PMessage") - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = WorkflowresultstateProto.javaDescriptor.getMessageTypes().get(0) - def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = WorkflowresultstateProto.scalaDescriptor.messages(0) - def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = { - var __out: _root_.scalapb.GeneratedMessageCompanion[_] = null - (__number: @_root_.scala.unchecked) match { - case 1 => __out = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry - } - __out - } - lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = - Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]]( - _root_.edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry - ) - def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber) - lazy val defaultInstance = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore( - operatorInfo = _root_.scala.collection.immutable.Map.empty - ) - @SerialVersionUID(0L) - final case class OperatorInfoEntry( - key: _root_.scala.Predef.String = "", - value: _root_.scala.Option[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = _root_.scala.None - ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[OperatorInfoEntry] { - @transient - private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 - private[this] def __computeSerializedValue(): _root_.scala.Int = { - var __size = 0 - - { - val __value = key - if (!__value.isEmpty) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(1, __value) - } - }; - if (value.isDefined) { - val __value = value.get - __size += 1 + _root_.com.google.protobuf.CodedOutputStream.computeUInt32SizeNoTag(__value.serializedSize) + __value.serializedSize - }; - __size - } - override def serializedSize: _root_.scala.Int = { - var read = __serializedSizeCachedValue - if (read == 0) { - read = __computeSerializedValue() - __serializedSizeCachedValue = read - } - read - } - def writeTo(`_output__`: _root_.com.google.protobuf.CodedOutputStream): _root_.scala.Unit = { - { - val __v = key - if (!__v.isEmpty) { - _output__.writeString(1, __v) - } - }; - value.foreach { __v => - val __m = __v - _output__.writeTag(2, 2) - _output__.writeUInt32NoTag(__m.serializedSize) - __m.writeTo(_output__) - }; - } - def withKey(__v: _root_.scala.Predef.String): OperatorInfoEntry = copy(key = __v) - def getValue: edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata = value.getOrElse(edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata.defaultInstance) - def clearValue: OperatorInfoEntry = copy(value = _root_.scala.None) - def withValue(__v: edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata): OperatorInfoEntry = copy(value = Option(__v)) - def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { - (__fieldNumber: @_root_.scala.unchecked) match { - case 1 => { - val __t = key - if (__t != "") __t else null - } - case 2 => value.orNull - } - } - def getField(__field: _root_.scalapb.descriptors.FieldDescriptor): _root_.scalapb.descriptors.PValue = { - _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) - (__field.number: @_root_.scala.unchecked) match { - case 1 => _root_.scalapb.descriptors.PString(key) - case 2 => value.map(_.toPMessage).getOrElse(_root_.scalapb.descriptors.PEmpty) - } - } - def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) - def companion = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry - // @@protoc_insertion_point(GeneratedMessage[edu.uci.ics.texera.web.WorkflowResultStore.OperatorInfoEntry]) - } - - object OperatorInfoEntry extends scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry] { - implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry] = this - def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry = { - var __key: _root_.scala.Predef.String = "" - var __value: _root_.scala.Option[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = _root_.scala.None - var _done__ = false - while (!_done__) { - val _tag__ = _input__.readTag() - _tag__ match { - case 0 => _done__ = true - case 10 => - __key = _input__.readStringRequireUtf8() - case 18 => - __value = Option(__value.fold(_root_.scalapb.LiteParser.readMessage[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata](_input__))(_root_.scalapb.LiteParser.readMessage(_input__, _))) - case tag => _input__.skipField(tag) - } - } - edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry( - key = __key, - value = __value - ) - } - implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry] = _root_.scalapb.descriptors.Reads{ - case _root_.scalapb.descriptors.PMessage(__fieldsMap) => - _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") - edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry( - key = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[_root_.scala.Predef.String]).getOrElse(""), - value = __fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).flatMap(_.as[_root_.scala.Option[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]]) - ) - case _ => throw new RuntimeException("Expected PMessage") - } - def javaDescriptor: _root_.com.google.protobuf.Descriptors.Descriptor = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.javaDescriptor.getNestedTypes().get(0) - def scalaDescriptor: _root_.scalapb.descriptors.Descriptor = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.scalaDescriptor.nestedMessages(0) - def messageCompanionForFieldNumber(__number: _root_.scala.Int): _root_.scalapb.GeneratedMessageCompanion[_] = { - var __out: _root_.scalapb.GeneratedMessageCompanion[_] = null - (__number: @_root_.scala.unchecked) match { - case 2 => __out = edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata - } - __out - } - lazy val nestedMessagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = Seq.empty - def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber) - lazy val defaultInstance = edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry( - key = "", - value = _root_.scala.None - ) - implicit class OperatorInfoEntryLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry](_l) { - def key: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.key)((c_, f_) => c_.copy(key = f_)) - def value: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] = field(_.getValue)((c_, f_) => c_.copy(value = Option(f_))) - def optionalValue: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Option[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]] = field(_.value)((c_, f_) => c_.copy(value = f_)) - } - final val KEY_FIELD_NUMBER = 1 - final val VALUE_FIELD_NUMBER = 2 - @transient - implicit val keyValueMapper: _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)] = - _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)](__m => (__m.key, __m.getValue))(__p => edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry(__p._1, Some(__p._2))) - def of( - key: _root_.scala.Predef.String, - value: _root_.scala.Option[edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] - ): _root_.edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry = _root_.edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry( - key, - value - ) - // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.texera.web.WorkflowResultStore.OperatorInfoEntry]) - } - - implicit class WorkflowResultStoreLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore](_l) { - def operatorInfo: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata]] = field(_.operatorInfo)((c_, f_) => c_.copy(operatorInfo = f_)) - } - final val OPERATOR_INFO_FIELD_NUMBER = 1 - @transient - private[workflowresultstate] val _typemapper_operatorInfo: _root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)] = implicitly[_root_.scalapb.TypeMapper[edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore.OperatorInfoEntry, (_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata)]] - def of( - operatorInfo: _root_.scala.collection.immutable.Map[_root_.scala.Predef.String, edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata] - ): _root_.edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore = _root_.edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore( - operatorInfo - ) - // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.texera.web.WorkflowResultStore]) -} diff --git a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowresultstateProto.scala b/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowresultstateProto.scala deleted file mode 100644 index 3cdcca1f03d..00000000000 --- a/core/amber/src/main/scalapb/edu/uci/ics/texera/web/workflowresultstate/WorkflowresultstateProto.scala +++ /dev/null @@ -1,40 +0,0 @@ -// Generated by the Scala Plugin for the Protocol Buffer Compiler. -// Do not edit! -// -// Protofile syntax: PROTO3 - -package edu.uci.ics.texera.web.workflowresultstate - -object WorkflowresultstateProto extends _root_.scalapb.GeneratedFileObject { - lazy val dependencies: Seq[_root_.scalapb.GeneratedFileObject] = Seq( - scalapb.options.ScalapbProto - ) - lazy val messagesCompanions: Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]] = - Seq[_root_.scalapb.GeneratedMessageCompanion[_ <: _root_.scalapb.GeneratedMessage]]( - edu.uci.ics.texera.web.workflowresultstate.WorkflowResultStore, - edu.uci.ics.texera.web.workflowresultstate.OperatorResultMetadata - ) - private lazy val ProtoBytes: _root_.scala.Array[Byte] = - scalapb.Encoding.fromBase64(scala.collection.immutable.Seq( - """CixlZHUvdWNpL2ljcy90ZXhlcmEvd29ya2Zsb3dyZXN1bHRzdGF0ZS5wcm90bxIWZWR1LnVjaS5pY3MudGV4ZXJhLndlYhoVc - 2NhbGFwYi9zY2FsYXBiLnByb3RvIpQCChNXb3JrZmxvd1Jlc3VsdFN0b3JlEnUKDW9wZXJhdG9yX2luZm8YASADKAsyPS5lZHUud - WNpLmljcy50ZXhlcmEud2ViLldvcmtmbG93UmVzdWx0U3RvcmUuT3BlcmF0b3JJbmZvRW50cnlCEeI/DhIMb3BlcmF0b3JJbmZvU - gxvcGVyYXRvckluZm8ahQEKEU9wZXJhdG9ySW5mb0VudHJ5EhoKA2tleRgBIAEoCUII4j8FEgNrZXlSA2tleRJQCgV2YWx1ZRgCI - AEoCzIuLmVkdS51Y2kuaWNzLnRleGVyYS53ZWIuT3BlcmF0b3JSZXN1bHRNZXRhZGF0YUIK4j8HEgV2YWx1ZVIFdmFsdWU6AjgBI - ogBChZPcGVyYXRvclJlc3VsdE1ldGFkYXRhEjAKC3R1cGxlX2NvdW50GAEgASgFQg/iPwwSCnR1cGxlQ291bnRSCnR1cGxlQ291b - nQSPAoPY2hhbmdlX2RldGVjdG9yGAIgASgJQhPiPxASDmNoYW5nZURldGVjdG9yUg5jaGFuZ2VEZXRlY3RvckIJ4j8GSABYAHgAY - gZwcm90bzM=""" - ).mkString) - lazy val scalaDescriptor: _root_.scalapb.descriptors.FileDescriptor = { - val scalaProto = com.google.protobuf.descriptor.FileDescriptorProto.parseFrom(ProtoBytes) - _root_.scalapb.descriptors.FileDescriptor.buildFrom(scalaProto, dependencies.map(_.scalaDescriptor)) - } - lazy val javaDescriptor: com.google.protobuf.Descriptors.FileDescriptor = { - val javaProto = com.google.protobuf.DescriptorProtos.FileDescriptorProto.parseFrom(ProtoBytes) - com.google.protobuf.Descriptors.FileDescriptor.buildFrom(javaProto, _root_.scala.Array( - scalapb.options.ScalapbProto.javaDescriptor - )) - } - @deprecated("Use javaDescriptor instead. In a future version this will refer to scalaDescriptor.", "ScalaPB 0.5.47") - def descriptor: com.google.protobuf.Descriptors.FileDescriptor = javaDescriptor -} \ No newline at end of file From 6d876ed9f83746c7fed597409c51345e445c5db6 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Thu, 6 Jul 2023 10:02:22 -0700 Subject: [PATCH 06/18] wip --- .../web/model/websocket/event/WebResultUpdateEvent.scala | 2 +- .../websocket/event/WorkflowAvailableResultEvent.scala | 2 +- .../{WorkflowResultService.scala => JobResultService.scala} | 6 +++--- .../ics/texera/web/service/ProgressiveResultService.scala | 4 ++-- .../edu/uci/ics/texera/web/service/WorkflowJobService.scala | 2 +- .../edu/uci/ics/texera/web/service/WorkflowService.scala | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) rename core/amber/src/main/scala/edu/uci/ics/texera/web/service/{WorkflowResultService.scala => JobResultService.scala} (98%) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala index 68ee72b2a93..d30390978b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala @@ -1,5 +1,5 @@ package edu.uci.ics.texera.web.model.websocket.event -import edu.uci.ics.texera.web.service.WorkflowResultService.WebResultUpdate +import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate case class WebResultUpdateEvent(updates: Map[String, WebResultUpdate]) extends TexeraWebSocketEvent diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala index 91b7f11e0d0..d57bbe46403 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.web.model.websocket.event import edu.uci.ics.texera.web.model.websocket.event.WorkflowAvailableResultEvent.OperatorAvailableResult -import edu.uci.ics.texera.web.service.WorkflowResultService.WebOutputMode +import edu.uci.ics.texera.web.service.JobResultService.WebOutputMode object WorkflowAvailableResultEvent { case class OperatorAvailableResult( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala similarity index 98% rename from core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index 2d7b25840d7..f0869515807 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -10,7 +10,7 @@ import edu.uci.ics.amber.engine.common.client.AmberClient import edu.uci.ics.amber.engine.common.tuple.ITuple import edu.uci.ics.texera.web.model.websocket.event.{PaginatedResultEvent, TexeraWebSocketEvent, WebResultUpdateEvent} import edu.uci.ics.texera.web.model.websocket.request.ResultPaginationRequest -import edu.uci.ics.texera.web.service.WorkflowResultService.WebResultUpdate +import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate import edu.uci.ics.texera.web.storage.{JobStateStore, OperatorResultMetadata, WorkflowResultStore, WorkflowStateStore} import edu.uci.ics.texera.web.workflowruntimestate.JobMetadataStore import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING @@ -26,7 +26,7 @@ import java.util.UUID import scala.collection.mutable import scala.concurrent.duration.DurationInt -object WorkflowResultService { +object JobResultService { val defaultPageSize: Int = 5 @@ -113,7 +113,7 @@ object WorkflowResultService { * - update the result data for each operator, * - send result update event to the frontend */ -class WorkflowResultService( +class JobResultService( val opResultStorage: OpResultStorage, val workflowStateStore: WorkflowStateStore ) extends SubscriptionManager { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala index 7bb91576e1a..eb17d78e295 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.web.service -import edu.uci.ics.texera.web.service.WorkflowResultService._ +import edu.uci.ics.texera.web.service.JobResultService._ import edu.uci.ics.texera.workflow.common.IncrementalOutputMode.{SET_DELTA, SET_SNAPSHOT} import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc @@ -35,7 +35,7 @@ class ProgressiveResultService( val webUpdate = (webOutputMode, sink.getOutputMode) match { case (PaginationMode(), SET_SNAPSHOT) => val numTuples = storage.getCount - val maxPageIndex = Math.ceil(numTuples / WorkflowResultService.defaultPageSize.toDouble).toInt + val maxPageIndex = Math.ceil(numTuples / JobResultService.defaultPageSize.toDouble).toInt WebPaginationUpdate( PaginationMode(), newTupleCount, diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index 84b497c76f7..2afcf79d0ca 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -22,7 +22,7 @@ import edu.uci.ics.texera.workflow.operators.udf.python.{ class WorkflowJobService( workflowContext: WorkflowContext, wsInput: WebsocketInput, - resultService: WorkflowResultService, + resultService: JobResultService, request: WorkflowExecuteRequest, errorHandler: Throwable => Unit, engineVersion: String diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala index eeaeb626d58..be227fbda9d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala @@ -68,8 +68,8 @@ class WorkflowService( } val wsInput = new WebsocketInput(errorHandler) val stateStore = new WorkflowStateStore() - val resultService: WorkflowResultService = - new WorkflowResultService(opResultStorage, stateStore) + val resultService: JobResultService = + new JobResultService(opResultStorage, stateStore) val exportService: ResultExportService = new ResultExportService(opResultStorage, UInteger.valueOf(wId)) val operatorCache: WorkflowCacheService = From 20895388f46ffcc844f90f6ab8520bb443d0738a Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 7 Jul 2023 04:14:57 -0700 Subject: [PATCH 07/18] complete --- .../event/WebResultUpdateEvent.scala | 2 +- .../event/WorkflowAvailableResultEvent.scala | 2 +- .../request/CacheStatusUpdateRequest.scala | 12 +- .../request/WorkflowExecuteRequest.scala | 3 +- .../resource/WorkflowWebsocketResource.scala | 20 ++- .../texera/web/service/JobResultService.scala | 123 +++++++++++------- .../service/ProgressiveResultService.scala | 60 --------- .../web/service/WorkflowCacheChecker.scala | 26 ++-- .../web/service/WorkflowCacheService.scala | 88 ------------- .../web/service/WorkflowJobService.scala | 12 +- .../texera/web/service/WorkflowService.scala | 29 +++-- .../common/operators/OperatorDescriptor.scala | 2 +- .../common/workflow/LogicalPlan.scala | 48 +++---- .../workflow/SinkInjectionTransformer.scala | 27 ++-- .../workflow/WorkflowCacheRewriter.scala | 39 +++--- .../common/workflow/WorkflowCompiler.scala | 51 +++++++- .../common/workflow/WorkflowRewriter.scala | 10 +- .../source/cache/CacheSourceOpDesc.scala | 2 +- .../source/scan/ScanSourceOpDesc.scala | 10 +- .../workflow/WorkflowRewriterSpec.scala | 27 ++-- 20 files changed, 264 insertions(+), 329 deletions(-) delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala index d30390978b0..6791eb1dc4f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala @@ -1,5 +1,5 @@ package edu.uci.ics.texera.web.model.websocket.event -import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate +import edu.uci.ics.texera.web.service.WebResultUpdate.WebResultUpdate case class WebResultUpdateEvent(updates: Map[String, WebResultUpdate]) extends TexeraWebSocketEvent diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala index d57bbe46403..9f71efa134b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.web.model.websocket.event import edu.uci.ics.texera.web.model.websocket.event.WorkflowAvailableResultEvent.OperatorAvailableResult -import edu.uci.ics.texera.web.service.JobResultService.WebOutputMode +import edu.uci.ics.texera.web.service.WebResultUpdate.WebOutputMode object WorkflowAvailableResultEvent { case class OperatorAvailableResult( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala index b207086c31f..f879002c0c4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala @@ -7,5 +7,13 @@ case class CacheStatusUpdateRequest( operators: List[OperatorDescriptor], links: List[OperatorLink], breakpoints: List[BreakpointInfo], - cachedOperatorIds: List[String] -) extends TexeraWebSocketRequest + opsToViewResult: List[String], + opsToReuseResult: List[String] +) extends TexeraWebSocketRequest { + + def toLogicalPlanPojo() = { + LogicalPlanPojo(operators, links, breakpoints, opsToViewResult, opsToReuseResult) + } +} + + diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala index 34775f4ef8f..f994f492e67 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala @@ -13,5 +13,6 @@ case class LogicalPlanPojo( operators: List[OperatorDescriptor], links: List[OperatorLink], breakpoints: List[BreakpointInfo], - var cachedOperatorIds: List[String] + opsToViewResult: List[String], + opsToReuseResult: List[String] ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala index 14399f1ff47..48172a52ed2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala @@ -5,14 +5,11 @@ import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.texera.Utils import edu.uci.ics.texera.web.{ServletAwareConfigurator, SessionState} import edu.uci.ics.texera.web.model.jooq.generated.tables.pojos.User -import edu.uci.ics.texera.web.model.websocket.event.{ - TexeraWebSocketEvent, - WorkflowErrorEvent, - WorkflowStateEvent -} +import edu.uci.ics.texera.web.model.websocket.event.{CacheStatusUpdateEvent, TexeraWebSocketEvent, WorkflowErrorEvent, WorkflowStateEvent} import edu.uci.ics.texera.web.model.websocket.request._ import edu.uci.ics.texera.web.model.websocket.response._ -import edu.uci.ics.texera.web.service.WorkflowService +import edu.uci.ics.texera.web.service.{WorkflowCacheChecker, WorkflowService} +import edu.uci.ics.texera.workflow.common.workflow.LogicalPlan import edu.uci.ics.texera.workflow.common.workflow.WorkflowCompiler.ConstraintViolationException import javax.websocket._ @@ -79,6 +76,17 @@ class WorkflowWebsocketResource extends LazyLogging { jobService.jobReconfigurationService.modifyOperatorLogic(modifyLogicRequest) send(session, modifyLogicResponse) } + case cacheStatusUpdateRequest: CacheStatusUpdateRequest => + if (workflowStateOpt.isDefined) { + val oldPlan = workflowStateOpt.get.lastCompletedLogicalPlan + if (oldPlan != null) { + val newPlan = LogicalPlan.apply(cacheStatusUpdateRequest.toLogicalPlanPojo()) + val validCacheOps = new WorkflowCacheChecker(oldPlan, newPlan).getValidCacheReuse() + val cacheUpdateResult = cacheStatusUpdateRequest.opsToReuseResult.map(o => + (o, if (validCacheOps.contains(o)) "cache valid" else "cache invalid")).toMap + send(session, CacheStatusUpdateEvent(cacheUpdateResult)) + } + } case other => workflowStateOpt match { case Some(workflow) => workflow.wsInput.onNext(other, uidOpt) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index f0869515807..4475876f515 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -10,31 +10,28 @@ import edu.uci.ics.amber.engine.common.client.AmberClient import edu.uci.ics.amber.engine.common.tuple.ITuple import edu.uci.ics.texera.web.model.websocket.event.{PaginatedResultEvent, TexeraWebSocketEvent, WebResultUpdateEvent} import edu.uci.ics.texera.web.model.websocket.request.ResultPaginationRequest -import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate +import edu.uci.ics.texera.web.service.WebResultUpdate.{WebResultUpdate, convertWebResultUpdate} import edu.uci.ics.texera.web.storage.{JobStateStore, OperatorResultMetadata, WorkflowResultStore, WorkflowStateStore} import edu.uci.ics.texera.web.workflowruntimestate.JobMetadataStore import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING import edu.uci.ics.texera.web.{SubscriptionManager, TexeraWebApplication} import edu.uci.ics.texera.workflow.common.IncrementalOutputMode +import edu.uci.ics.texera.workflow.common.IncrementalOutputMode.{SET_DELTA, SET_SNAPSHOT} import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.workflow.LogicalPlan import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.source.cache.CacheSourceOpDesc import java.util.UUID import scala.collection.mutable import scala.concurrent.duration.DurationInt -object JobResultService { - - val defaultPageSize: Int = 5 - +object WebResultUpdate { // convert Tuple from engine's format to JSON format def webDataFromTuple( - mode: WebOutputMode, - table: List[ITuple], - chartType: Option[String] + mode: WebOutputMode, + table: List[ITuple], + chartType: Option[String] ): WebDataUpdate = { val tableInJson = table.map(t => t.asInstanceOf[Tuple].asKeyValuePairJson()) WebDataUpdate(mode, tableInJson, chartType) @@ -48,9 +45,9 @@ object JobResultService { * @return list of indices of modified pages, index starts from 1 */ def calculateDirtyPageIndices( - beforeSnapshot: List[ITuple], - afterSnapshot: List[ITuple], - pageSize: Int + beforeSnapshot: List[ITuple], + afterSnapshot: List[ITuple], + pageSize: Int ): List[Int] = { var currentIndex = 1 var currentIndexPageCount = 0 @@ -97,13 +94,66 @@ object JobResultService { final case class SetDeltaMode() extends WebOutputMode case class WebPaginationUpdate( - mode: PaginationMode, - totalNumTuples: Long, - dirtyPageIndices: List[Int] + mode: PaginationMode, + totalNumTuples: Long, + dirtyPageIndices: List[Int] ) extends WebResultUpdate case class WebDataUpdate(mode: WebOutputMode, table: List[ObjectNode], chartType: Option[String]) - extends WebResultUpdate + extends WebResultUpdate + + + /** + * For SET_SNAPSHOT output mode: result is the latest snapshot + * FOR SET_DELTA output mode: + * - for insert-only delta: effectively the same as latest snapshot + * - for insert-retract delta: the union of all delta outputs, not compacted to a snapshot + * + * Produces the WebResultUpdate to send to frontend from a result update from the engine. + */ + def convertWebResultUpdate(sink: ProgressiveSinkOpDesc, + oldTupleCount: Int, newTupleCount: Int): WebResultUpdate = { + val webOutputMode: WebOutputMode = { + (sink.getOutputMode, sink.getChartType) match { + // visualization sinks use its corresponding mode + case (SET_SNAPSHOT, Some(_)) => SetSnapshotMode() + case (SET_DELTA, Some(_)) => SetDeltaMode() + // Non-visualization sinks use pagination mode + case (_, None) => PaginationMode() + } + } + + val storage = sink.getStorage + val webUpdate = (webOutputMode, sink.getOutputMode) match { + case (PaginationMode(), SET_SNAPSHOT) => + val numTuples = storage.getCount + val maxPageIndex = Math.ceil(numTuples / JobResultService.defaultPageSize.toDouble).toInt + WebPaginationUpdate( + PaginationMode(), + newTupleCount, + (1 to maxPageIndex).toList + ) + case (SetSnapshotMode(), SET_SNAPSHOT) => + webDataFromTuple(webOutputMode, storage.getAll.toList, sink.getChartType) + case (SetDeltaMode(), SET_DELTA) => + val deltaList = storage.getAllAfter(oldTupleCount).toList + webDataFromTuple(webOutputMode, deltaList, sink.getChartType) + + // currently not supported mode combinations + // (PaginationMode, SET_DELTA) | (DataSnapshotMode, SET_DELTA) | (DataDeltaMode, SET_SNAPSHOT) + case _ => + throw new RuntimeException( + "update mode combination not supported: " + (webOutputMode, sink.getOutputMode) + ) + } + webUpdate + } +} + +object JobResultService { + + val defaultPageSize: Int = 5 + } /** @@ -118,8 +168,8 @@ class JobResultService( val workflowStateStore: WorkflowStateStore ) extends SubscriptionManager { - var progressiveResults: mutable.HashMap[String, ProgressiveResultService] = - mutable.HashMap[String, ProgressiveResultService]() + var sinkOperators: mutable.HashMap[String, ProgressiveSinkOpDesc] = + mutable.HashMap[String, ProgressiveSinkOpDesc]() private val resultPullingFrequency = AmberUtils.amberConfig.getInt("web-server.workflow-result-pulling-in-seconds") private var resultUpdateCancellable: Cancellable = _ @@ -180,42 +230,25 @@ class JobResultService( case (opId, info) => val oldInfo = oldState.resultInfo.getOrElse(opId, OperatorResultMetadata()) buf(opId) = - progressiveResults(opId).convertWebResultUpdate(oldInfo.tupleCount, info.tupleCount) + convertWebResultUpdate(sinkOperators(opId), oldInfo.tupleCount, info.tupleCount) } Iterable(WebResultUpdateEvent(buf.toMap)) }) ) // first clear all the results - progressiveResults.clear() + sinkOperators.clear() workflowStateStore.resultStore.updateState { _ => WorkflowResultStore() // empty result store } - // If we have cache sources, make dummy sink operators for displaying results on the frontend. - logicalPlan.getSourceOperators.map(source => { - logicalPlan.getOperator(source) match { - case cacheSourceOpDesc: CacheSourceOpDesc => - val dummySink = new ProgressiveSinkOpDesc() - dummySink.setStorage(opResultStorage.get(cacheSourceOpDesc.targetSinkStorageId)) - progressiveResults += ( - ( - cacheSourceOpDesc.targetSinkStorageId, - new ProgressiveResultService(dummySink) - ) - ) - case other => //skip - } - }) - // For operators connected to a sink and sinks, // create result service so that the results can be displayed. logicalPlan.getTerminalOperators.map(sink => { logicalPlan.getOperator(sink) match { case sinkOp: ProgressiveSinkOpDesc => - val service = new ProgressiveResultService(sinkOp) - progressiveResults += ((sinkOp.getUpstreamId.get, service)) - progressiveResults += ((sink, service)) + sinkOperators += ((sinkOp.getUpstreamId.get, sinkOp)) + sinkOperators += ((sink, sinkOp)) case other => // skip other non-texera-managed sinks, if any } }) @@ -226,8 +259,8 @@ class JobResultService( val from = request.pageSize * (request.pageIndex - 1) val opId = request.operatorID val paginationIterable = - if (opResultStorage.contains(opId)) { - opResultStorage.get(opId).getRange(from, from + request.pageSize) + if (sinkOperators.contains(opId)) { + sinkOperators(opId).getStorage.getRange(from, from + request.pageSize) } else { Iterable.empty } @@ -239,10 +272,10 @@ class JobResultService( def onResultUpdate(): Unit = { workflowStateStore.resultStore.updateState { _ => - val newInfo: Map[String, OperatorResultMetadata] = progressiveResults.map { - case (id, service) => - val count = service.sink.getStorage.getCount.toInt - val mode = service.sink.getOutputMode + val newInfo: Map[String, OperatorResultMetadata] = sinkOperators.map { + case (id, sink) => + val count = sink.getStorage.getCount.toInt + val mode = sink.getOutputMode val changeDetector = if (mode == IncrementalOutputMode.SET_SNAPSHOT) { UUID.randomUUID.toString diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala deleted file mode 100644 index eb17d78e295..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ProgressiveResultService.scala +++ /dev/null @@ -1,60 +0,0 @@ -package edu.uci.ics.texera.web.service - -import edu.uci.ics.texera.web.service.JobResultService._ -import edu.uci.ics.texera.workflow.common.IncrementalOutputMode.{SET_DELTA, SET_SNAPSHOT} -import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc - -/** - * OperatorResultService manages the materialized result of an operator. - * It always keeps the latest snapshot of the computation result. - */ -class ProgressiveResultService( - val sink: ProgressiveSinkOpDesc -) { - - /** - * For SET_SNAPSHOT output mode: result is the latest snapshot - * FOR SET_DELTA output mode: - * - for insert-only delta: effectively the same as latest snapshot - * - for insert-retract delta: the union of all delta outputs, not compacted to a snapshot - * - * Produces the WebResultUpdate to send to frontend from a result update from the engine. - */ - def convertWebResultUpdate(oldTupleCount: Int, newTupleCount: Int): WebResultUpdate = { - val webOutputMode: WebOutputMode = { - (sink.getOutputMode, sink.getChartType) match { - // visualization sinks use its corresponding mode - case (SET_SNAPSHOT, Some(_)) => SetSnapshotMode() - case (SET_DELTA, Some(_)) => SetDeltaMode() - // Non-visualization sinks use pagination mode - case (_, None) => PaginationMode() - } - } - - val storage = sink.getStorage - val webUpdate = (webOutputMode, sink.getOutputMode) match { - case (PaginationMode(), SET_SNAPSHOT) => - val numTuples = storage.getCount - val maxPageIndex = Math.ceil(numTuples / JobResultService.defaultPageSize.toDouble).toInt - WebPaginationUpdate( - PaginationMode(), - newTupleCount, - (1 to maxPageIndex).toList - ) - case (SetSnapshotMode(), SET_SNAPSHOT) => - webDataFromTuple(webOutputMode, storage.getAll.toList, sink.getChartType) - case (SetDeltaMode(), SET_DELTA) => - val deltaList = storage.getAllAfter(oldTupleCount).toList - webDataFromTuple(webOutputMode, deltaList, sink.getChartType) - - // currently not supported mode combinations - // (PaginationMode, SET_DELTA) | (DataSnapshotMode, SET_DELTA) | (DataDeltaMode, SET_SNAPSHOT) - case _ => - throw new RuntimeException( - "update mode combination not supported: " + (webOutputMode, sink.getOutputMode) - ) - } - webUpdate - } - -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala index 90608ae033f..3529392ba36 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala @@ -6,17 +6,21 @@ import scala.collection.mutable class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { - val equivalenceClass = new mutable.HashMap[String, Int]() - var nextClassId: Int = 0 + private val equivalenceClass = new mutable.HashMap[String, Int]() + private var nextClassId: Int = 0 - def getNextClassId(): Int = { + private def getNextClassId(): Int = { nextClassId += 1 nextClassId } // checks the validity of the cache given the old plan and the new plan - // returns a map: - def checkCacheValidity(): Map[String, String] = { + // returns a set of operator IDs that can be reused + // the operatorID is also the storage key + def getValidCacheReuse(): Set[String] = { + if (oldWorkflow == null) { + return Set() + } // for each operator in the old workflow, add it to its own equivalence class oldWorkflow.jgraphtDag.iterator().forEachRemaining(opId => { @@ -39,7 +43,7 @@ class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { val oldOp = oldWorkflow.operators.find(op => op.equals(newOp)).orNull // check if the old workflow contains the same operator content - val newOpClassId = if (oldOp == null) { + val newOpClassId = if (oldOp == null) { getNextClassId() // operator not found, create a new class } else{ // check its inputs are all in the same equivalence class @@ -55,17 +59,19 @@ class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { equivalenceClass.put("new-" + opId, newOpClassId) }) + // for each cached operator in the old workflow, // check if it can be still used in the new workflow - oldWorkflow.cachedOperatorIds.map(cachedOpId => { - val oldCachedOpId = "old-" + cachedOpId + oldWorkflow.terminalOperators.map(sinkOpId => { + val opId = oldWorkflow.getUpstream(sinkOpId).head.operatorID + val oldCachedOpId = "old-" + opId // find its equivalence class val oldClassId = equivalenceClass(oldCachedOpId) // find the corresponding operator that can still use this cache val newOpId = equivalenceClass.find(p => p._2 == oldClassId && p._1 != oldCachedOpId) .map(p => p._1).orNull - (newOpId, cachedOpId) - }).filter(p => p._1 != null && p._2 != null).toMap + if (newOpId == null) null else opId + }).filter(o => o != null).toSet } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala deleted file mode 100644 index b9ef3e047ea..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheService.scala +++ /dev/null @@ -1,88 +0,0 @@ -package edu.uci.ics.texera.web.service - -import com.typesafe.scalalogging.LazyLogging -import edu.uci.ics.amber.engine.common.AmberUtils -import edu.uci.ics.texera.web.model.websocket.event.CacheStatusUpdateEvent -import edu.uci.ics.texera.web.{SubscriptionManager, WebsocketInput} -import edu.uci.ics.texera.web.model.websocket.request.CacheStatusUpdateRequest -import edu.uci.ics.texera.web.storage.WorkflowStateStore -//import edu.uci.ics.texera.web.workflowcachestate.CacheState.{INVALID, VALID} -import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor -import edu.uci.ics.texera.workflow.common.storage.OpResultStorage -import edu.uci.ics.texera.workflow.common.workflow.{LogicalPlan, WorkflowRewriter, WorkflowVertex} -import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.source.cache.CacheSourceOpDesc - -import scala.collection.mutable - -object WorkflowCacheService extends LazyLogging { - def isAvailable: Boolean = AmberUtils.amberConfig.getBoolean("cache.enabled") -} - -class WorkflowCacheService( - opResultStorage: OpResultStorage, - stateStore: WorkflowStateStore, - wsInput: WebsocketInput -) extends SubscriptionManager - with LazyLogging { - - var lastLogicalPlan: LogicalPlan = null - -// val cachedOperators: mutable.HashMap[String, OperatorDescriptor] = -// mutable.HashMap[String, OperatorDescriptor]() -// val cacheSourceOperators: mutable.HashMap[String, CacheSourceOpDesc] = -// mutable.HashMap[String, CacheSourceOpDesc]() -// val cacheSinkOperators: mutable.HashMap[String, ProgressiveSinkOpDesc] = -// mutable.HashMap[String, ProgressiveSinkOpDesc]() -// val operatorRecord: mutable.HashMap[String, WorkflowVertex] = -// mutable.HashMap[String, WorkflowVertex]() - -// addSubscription( -// stateStore.cacheStore.registerDiffHandler((oldState, newState) => { -// Iterable(CacheStatusUpdateEvent(newState.operatorInfo.map { -// case (k, v) => (k, if (v.isInvalid) "cache invalid" else "cache valid") -// })) -// }) -// ) - - - - addSubscription(wsInput.subscribe((req: CacheStatusUpdateRequest, uidOpt) => { -// updateCacheStatus(req) - })) - -// def updateCacheStatus(request: CacheStatusUpdateRequest): Unit = { -// val logicalPlan = -// LogicalPlan(request.operators, request.links, request.breakpoints, request.cachedOperatorIds) -// logicalPlan.cachedOperatorIds = request.cachedOperatorIds -// logger.debug(s"Cached operators: $cachedOperators with ${request.cachedOperatorIds}") -// val workflowRewriter = new WorkflowRewriter( -// logicalPlan, -// cachedOperators.clone(), -// cacheSourceOperators.clone(), -// cacheSinkOperators.clone(), -// operatorRecord.clone(), -// opResultStorage -// ) -// -// val invalidSet = workflowRewriter.cacheStatusUpdate() -// stateStore.cacheStore.updateState { oldState => -// oldState.withOperatorInfo( -// request.cachedOperatorIds -// .filter(cachedOperators.contains) -// .map(id => { -// if (cachedOperators.contains(id)) { -// if (!invalidSet.contains(id)) { -// (id, VALID) -// } else { -// (id, INVALID) -// } -// } else { -// (id, INVALID) -// } -// }) -// .toMap -// ) -// } -// } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index 2afcf79d0ca..c816a9374c5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -25,7 +25,8 @@ class WorkflowJobService( resultService: JobResultService, request: WorkflowExecuteRequest, errorHandler: Throwable => Unit, - engineVersion: String + engineVersion: String, + lastCompletedLogicalPlan: LogicalPlan, ) extends SubscriptionManager with LazyLogging { @@ -33,12 +34,13 @@ class WorkflowJobService( val workflowCompiler: WorkflowCompiler = createWorkflowCompiler(LogicalPlan(request.logicalPlan)) val workflow: Workflow = workflowCompiler.amberWorkflow( WorkflowIdentity(workflowContext.jobId), - resultService.opResultStorage + resultService.opResultStorage, + lastCompletedLogicalPlan ) private val controllerConfig = { val conf = ControllerConfig.default if ( - workflowCompiler.finalLogicalPlan.operators.exists { + workflowCompiler.logicalPlan.operators.exists { case x: DualInputPortsPythonUDFOpDescV2 => true case x: PythonUDFOpDescV2 => true case x: PythonUDFSourceOpDescV2 => true @@ -75,13 +77,13 @@ class WorkflowJobService( workflowContext.executionID = -1 // for every new execution, // reset it so that the value doesn't carry over across executions def startWorkflow(): Unit = { - for (pair <- workflowCompiler.finalLogicalPlan.breakpoints) { + for (pair <- workflowCompiler.logicalPlan.breakpoints) { Await.result( jobBreakpointService.addBreakpoint(pair.operatorID, pair.breakpoint), Duration.fromSeconds(10) ) } - resultService.attachToJob(stateStore, workflowCompiler.finalLogicalPlan, client) + resultService.attachToJob(stateStore, workflowCompiler.logicalPlan, client) if (WorkflowService.userSystemEnabled) { workflowContext.executionID = ExecutionsMetadataPersistService.insertNewExecution( workflowContext.wId, diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala index be227fbda9d..31e36a6e950 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala @@ -3,18 +3,18 @@ package edu.uci.ics.texera.web.service import java.util.concurrent.ConcurrentHashMap import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.amber.engine.common.AmberUtils + import scala.collection.JavaConverters._ import edu.uci.ics.texera.web.model.websocket.event.{TexeraWebSocketEvent, WorkflowErrorEvent} import edu.uci.ics.texera.web.{SubscriptionManager, WebsocketInput, WorkflowLifecycleManager} -import edu.uci.ics.texera.web.model.websocket.request.{ - WorkflowExecuteRequest, - WorkflowKillRequest -} +import edu.uci.ics.texera.web.model.websocket.request.{WorkflowExecuteRequest, WorkflowKillRequest} import edu.uci.ics.texera.web.resource.WorkflowWebsocketResource import edu.uci.ics.texera.web.service.WorkflowService.mkWorkflowStateId import edu.uci.ics.texera.web.storage.WorkflowStateStore +import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.COMPLETED import edu.uci.ics.texera.workflow.common.WorkflowContext import edu.uci.ics.texera.workflow.common.storage.OpResultStorage +import edu.uci.ics.texera.workflow.common.workflow.LogicalPlan import io.reactivex.rxjava3.disposables.{CompositeDisposable, Disposable} import io.reactivex.rxjava3.subjects.BehaviorSubject import org.jooq.types.UInteger @@ -68,13 +68,13 @@ class WorkflowService( } val wsInput = new WebsocketInput(errorHandler) val stateStore = new WorkflowStateStore() + var jobService: BehaviorSubject[WorkflowJobService] = BehaviorSubject.create() + val resultService: JobResultService = new JobResultService(opResultStorage, stateStore) val exportService: ResultExportService = new ResultExportService(opResultStorage, UInteger.valueOf(wId)) - val operatorCache: WorkflowCacheService = - new WorkflowCacheService(opResultStorage, stateStore, wsInput) - var jobService: BehaviorSubject[WorkflowJobService] = BehaviorSubject.create() + val lifeCycleManager: WorkflowLifecycleManager = new WorkflowLifecycleManager( s"wid=$wId", cleanUpTimeout, @@ -86,6 +86,17 @@ class WorkflowService( } ) + var lastCompletedLogicalPlan: LogicalPlan = null + + jobService.subscribe { job: WorkflowJobService => { + job.stateStore.jobMetadataStore.registerDiffHandler { (oldState, newState) => { + if (oldState.state != COMPLETED && newState.state == COMPLETED) { + lastCompletedLogicalPlan = job.workflowCompiler.logicalPlan + } + Iterable.empty + }} + }} + addSubscription( wsInput.subscribe((evt: WorkflowExecuteRequest, uidOpt) => initJobService(evt, uidOpt)) ) @@ -141,7 +152,8 @@ class WorkflowService( resultService, req, errorHandler, - convertToJson(req.engineVersion) + convertToJson(req.engineVersion), + lastCompletedLogicalPlan ) lifeCycleManager.registerCleanUpOnStateChange(job.stateStore) jobService.onNext(job) @@ -158,7 +170,6 @@ class WorkflowService( override def unsubscribeAll(): Unit = { super.unsubscribeAll() Option(jobService.getValue).foreach(_.unsubscribeAll()) - operatorCache.unsubscribeAll() resultService.unsubscribeAll() } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala index 47b2f198e23..cda45a23ce7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala @@ -180,7 +180,7 @@ abstract class OperatorDescriptor extends Serializable { override def hashCode: Int = HashCodeBuilder.reflectionHashCode(this) - override def equals(that: Any): Boolean = EqualsBuilder.reflectionEquals(this, that) + override def equals(that: Any): Boolean = EqualsBuilder.reflectionEquals(this, that, "context") override def toString: String = ToStringBuilder.reflectionToString(this) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index b37d4e16438..2949712a99a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -39,15 +39,16 @@ object LogicalPlan { workflowDag } - def apply(pojo: LogicalPlanPojo): LogicalPlan = - LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, List()).normalize() + def apply(pojo: LogicalPlanPojo): LogicalPlan = { + SinkInjectionTransformer.transform(pojo) + } } case class LogicalPlan( operators: List[OperatorDescriptor], links: List[OperatorLink], breakpoints: List[BreakpointInfo], - var cachedOperatorIds: List[String] = List() + opsToReuseCache: List[String] = List() ) { lazy val operatorMap: Map[String, OperatorDescriptor] = @@ -101,15 +102,15 @@ case class LogicalPlan( } def addOperator(operatorDescriptor: OperatorDescriptor): LogicalPlan = { - this.copy(operators :+ operatorDescriptor, links, breakpoints, cachedOperatorIds) + this.copy(operators :+ operatorDescriptor, links, breakpoints, opsToReuseCache) } def removeOperator(operatorId: String): LogicalPlan = { this.copy( - operators.filter(o => o.operatorID == operatorId), - links.filter(l => l.origin.operatorID == operatorId || l.destination.operatorID == operatorId), - breakpoints.filter(b => b.operatorID == operatorId), - cachedOperatorIds.filter(c => c == operatorId) + operators.filter(o => o.operatorID != operatorId), + links.filter(l => l.origin.operatorID != operatorId && l.destination.operatorID != operatorId), + breakpoints.filter(b => b.operatorID != operatorId), + opsToReuseCache.filter(c => c != operatorId) ) } @@ -122,7 +123,7 @@ case class LogicalPlan( ): LogicalPlan = { val newLink = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) val newLinks = links :+ newLink - this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + this.copy(operators, newLinks, breakpoints, opsToReuseCache) } // returns a new physical plan with the edges removed @@ -134,14 +135,14 @@ case class LogicalPlan( ): LogicalPlan = { val linkToRemove = OperatorLink(OperatorPort(from, fromPort), OperatorPort(to, toPort)) val newLinks = links.filter(l => l != linkToRemove) - this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + this.copy(operators, newLinks, breakpoints, opsToReuseCache) } def removeEdge( edge: OperatorLink ): LogicalPlan = { val newLinks = links.filter(l => l != edge) - this.copy(operators, newLinks, breakpoints, cachedOperatorIds) + this.copy(operators, newLinks, breakpoints, opsToReuseCache) } def getDownstream(operatorID: String): List[OperatorDescriptor] = { @@ -250,25 +251,6 @@ case class LogicalPlan( throw new RuntimeException(s"${errorList.size} error(s) occurred in schema propagation.") } - // assign storage to texera-managed sinks before generating exec config - operators.foreach { - case o @ (sink: ProgressiveSinkOpDesc) => - val storageKey = sink.getUpstreamId.getOrElse(o.operatorID) - // due to the size limit of single document in mongoDB (16MB) - // for sinks visualizing HTMLs which could possibly be large in size, we always use the memory storage. - val storageType = - if (sink.getChartType.contains(VisualizationConstants.HTML_VIZ)) OpResultStorage.MEMORY - else OpResultStorage.defaultStorageMode - sink.setStorage( - opResultStorage.create( - storageKey, - outputSchemaMap(o.operatorIdentifier).head, - storageType - ) - ) - case _ => - } - var physicalPlan = PhysicalPlan(List(), List()) operators.foreach(o => { @@ -323,8 +305,8 @@ case class LogicalPlan( physicalPlan } - def normalize(): LogicalPlan = { - SinkInjectionTransformer.transform(this) - } +// def normalize(): LogicalPlan = { +// SinkInjectionTransformer.transform(this) +// } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala index e5effeca57c..5b87bfaebb0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala @@ -1,21 +1,21 @@ package edu.uci.ics.texera.workflow.common.workflow -import edu.uci.ics.texera.workflow.common.WorkflowContext +import edu.uci.ics.texera.web.model.websocket.request.LogicalPlanPojo import edu.uci.ics.texera.workflow.operators.sink.SinkOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator object SinkInjectionTransformer { - def transform(logicalPlan: LogicalPlan): LogicalPlan = { - var resultPlan = logicalPlan + def transform(pojo: LogicalPlanPojo): LogicalPlan = { + var logicalPlan = LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, pojo.opsToReuseResult) // for any terminal operator without a sink, add a sink val nonSinkTerminalOps = logicalPlan.getTerminalOperators.filter(opId => ! logicalPlan.getOperator(opId).isInstanceOf[SinkOpDesc] ) - // for any operators marked as cache (view result) without a sink, add a sink - val viewResultOps = logicalPlan.cachedOperatorIds.filter(opId => + // for any operators marked as view result without a sink, add a sink + val viewResultOps = pojo.opsToViewResult.filter(opId => ! logicalPlan.getDownstream(opId).exists(op => op.isInstanceOf[SinkOpDesc]) ) @@ -24,30 +24,27 @@ object SinkInjectionTransformer { val op = logicalPlan.getOperator(opId) op.operatorInfo.outputPorts.indices.foreach(outPort => { val sink = new ProgressiveSinkOpDesc() - resultPlan = resultPlan + logicalPlan = logicalPlan .addOperator(sink) .addEdge(op.operatorID, sink.operatorID, outPort) }) }) // check precondition: all the terminal operators should sinks now - assert(resultPlan.getTerminalOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) - - var finalCachedOpIds = Set[String]() + assert(logicalPlan.getTerminalOperators.forall(o => logicalPlan.getOperator(o).isInstanceOf[SinkOpDesc])) // for each sink: // set the corresponding upstream ID and port // set output mode based on the visualization operator before it - resultPlan.getTerminalOperators.foreach(sinkOpId => { - val sinkOp = resultPlan.getOperator(sinkOpId).asInstanceOf[ProgressiveSinkOpDesc] - val upstream = resultPlan.getUpstream(sinkOpId).headOption - val edge = resultPlan.links.find(l => + logicalPlan.getTerminalOperators.foreach(sinkOpId => { + val sinkOp = logicalPlan.getOperator(sinkOpId).asInstanceOf[ProgressiveSinkOpDesc] + val upstream = logicalPlan.getUpstream(sinkOpId).headOption + val edge = logicalPlan.links.find(l => l.origin.operatorID == upstream.map(_.operatorID).orNull && l.destination.operatorID == sinkOpId ) assert(upstream.nonEmpty) if (upstream.nonEmpty && edge.nonEmpty) { - finalCachedOpIds += upstream.get // set upstream ID and port sinkOp.setUpstreamId(upstream.get.operatorID) sinkOp.setUpstreamPort(edge.get.origin.portOrdinal) @@ -63,7 +60,7 @@ object SinkInjectionTransformer { } }) - resultPlan.copy(cachedOperatorIds = finalCachedOpIds.toList) + logicalPlan } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala index a325e343496..cc7513644ef 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala @@ -9,17 +9,28 @@ object WorkflowCacheRewriter { def transform( logicalPlan: LogicalPlan, storage: OpResultStorage, - availableCache: Map[String, String], // key: operator ID in workflow, value: cache key in storage - operatorsToUseCache: Set[String], // user-specified operators to reuse cache if possible + availableCache: Set[String], ): LogicalPlan = { var resultPlan = logicalPlan - operatorsToUseCache.intersect(availableCache.keySet).foreach(opId => { - val cacheId = availableCache(opId) - val materializationReader = new CacheSourceOpDesc(cacheId, storage) + // an operator can reuse cache if + // 1: the user wants the operator to reuse past result + // 2: the operator is equivalent to the last run + val opsToUseCache = logicalPlan.opsToReuseCache.toSet.intersect(availableCache) + + // remove sinks directly connected to operators that are already reusing cache + val unnecessarySinks = resultPlan.getTerminalOperators.filter(sink => { + opsToUseCache.contains(resultPlan.getUpstream(sink).head.operatorID) + }) + unnecessarySinks.foreach(o => { + resultPlan = resultPlan.removeOperator(o) + }) + + opsToUseCache.foreach(opId => { + val materializationReader = new CacheSourceOpDesc(opId, storage) resultPlan = resultPlan.addOperator(materializationReader) // replace the connection of all outgoing edges of opId with the cache - val edgesToReplace = resultPlan.getUpstreamEdges(opId) + val edgesToReplace = resultPlan.getDownstreamEdges(opId) edgesToReplace.foreach(e => { resultPlan = resultPlan.removeEdge(e.origin.operatorID, e.destination.operatorID, e.origin.portOrdinal, e.destination.portOrdinal) @@ -28,22 +39,16 @@ object WorkflowCacheRewriter { }) }) - // remove sinks directly connected to operators that are already cached - val unnecessarySinks = resultPlan.getTerminalOperators.filter(sink => { - availableCache.contains(resultPlan.getUpstream(sink).head.operatorID) - }) - unnecessarySinks.foreach(o => { - resultPlan = resultPlan.removeOperator(o) - }) - // operators that are no longer reachable by any sink don't need to run val allOperators = resultPlan.operators.map(op => op.operatorID).toSet - assert(allOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) - val usefulOperators = resultPlan.terminalOperators.flatMap(o => resultPlan.getAncestorOpIds(o)).toSet - allOperators.diff(usefulOperators).foreach(o => { + val sinkOps = resultPlan.operators.filter(op => op.isInstanceOf[SinkOpDesc]).map(o => o.operatorID) + val usefulOperators = sinkOps ++ sinkOps.flatMap(o => resultPlan.getAncestorOpIds(o)).toSet + allOperators.diff(usefulOperators.toSet).foreach(o => { resultPlan = resultPlan.removeOperator(o) }) + assert(resultPlan.terminalOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) + resultPlan } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index 6dd5829987e..9d0f0b30ec6 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -3,9 +3,12 @@ package edu.uci.ics.texera.workflow.common.workflow import edu.uci.ics.amber.engine.architecture.controller.Workflow import edu.uci.ics.amber.engine.architecture.scheduling.WorkflowPipelinedRegionsBuilder import edu.uci.ics.amber.engine.common.virtualidentity.WorkflowIdentity +import edu.uci.ics.texera.web.service.WorkflowCacheChecker import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.{ConstraintViolation, WorkflowContext} +import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc +import edu.uci.ics.texera.workflow.operators.visualization.VisualizationConstants object WorkflowCompiler { @@ -23,8 +26,6 @@ object WorkflowCompiler { class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContext) { logicalPlan.operatorMap.values.foreach(initOperator) - lazy val finalLogicalPlan = transformLogicalPlan(logicalPlan) - def initOperator(operator: OperatorDescriptor): Unit = { operator.setContext(context) } @@ -34,14 +35,50 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex .map(o => (o._1, o._2.validate().toSet)) .filter(o => o._2.nonEmpty) - def transformLogicalPlan(originalPlan: LogicalPlan): LogicalPlan = { - // logical plan transformation: add a sink operator for terminal operators without a sink - SinkInjectionTransformer.transform(originalPlan, context) + private def assignSinkStorage( + logicalPlan: LogicalPlan, storage: OpResultStorage, + reuseStorageSet: Set[String] = Set() + ) = { + // assign storage to texera-managed sinks before generating exec config + logicalPlan.operators.foreach { + case o @ (sink: ProgressiveSinkOpDesc) => + val storageKey = sink.getUpstreamId.getOrElse(o.operatorID) + // due to the size limit of single document in mongoDB (16MB) + // for sinks visualizing HTMLs which could possibly be large in size, we always use the memory storage. + val storageType = { + if (sink.getChartType.contains(VisualizationConstants.HTML_VIZ)) OpResultStorage.MEMORY + else OpResultStorage.defaultStorageMode + } + if (reuseStorageSet.contains(storageKey) && storage.contains(storageKey)) { + sink.setStorage(storage.get(storageKey)) + } else { + sink.setStorage( + storage.create( + storageKey, + logicalPlan.outputSchemaMap(o.operatorIdentifier).head, + storageType + ) + ) + } + case _ => + } } - def amberWorkflow(workflowId: WorkflowIdentity, opResultStorage: OpResultStorage): Workflow = { - val physicalPlan0 = finalLogicalPlan.toPhysicalPlan(this.context, opResultStorage) + def amberWorkflow( + workflowId: WorkflowIdentity, + opResultStorage: OpResultStorage, + lastCompletedJob: LogicalPlan = null + ): Workflow = { + val cacheReuses = new WorkflowCacheChecker(lastCompletedJob, logicalPlan).getValidCacheReuse() + val opsToReuseCache = cacheReuses.intersect(logicalPlan.opsToReuseCache.toSet) + val rewrittenLogicalPlan = WorkflowCacheRewriter.transform(logicalPlan, opResultStorage, opsToReuseCache) + rewrittenLogicalPlan.operatorMap.values.foreach(initOperator) + + assignSinkStorage(logicalPlan, opResultStorage, opsToReuseCache) + assignSinkStorage(rewrittenLogicalPlan, opResultStorage, opsToReuseCache) + + val physicalPlan0 = rewrittenLogicalPlan.toPhysicalPlan(this.context, opResultStorage) // create pipelined regions. val physicalPlan1 = new WorkflowPipelinedRegionsBuilder( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala index 0f305284d31..b56d4ad9123 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriter.scala @@ -216,7 +216,7 @@ class WorkflowRewriter( operatorRecord += ((opId, getWorkflowVertex(workflowDAG.getOperator(opId)))) logger.info("Vertex {} is not recorded.", operatorRecord(opId)) true - } else if (logicalPlan.cachedOperatorIds.contains(opId)) { + } else if (logicalPlan.opsToReuseCache.contains(opId)) { !operatorRecord(opId).equals(getWorkflowVertex(workflowDAG.getOperator(opId))) } else { val vertex = getWorkflowVertex(workflowDAG.getOperator(opId)) @@ -225,7 +225,7 @@ class WorkflowRewriter( logger.info("Vertex {} is updated.", operatorRecord(opId)) true } else if (cachedOperatorDescriptors.contains(opId)) { - !logicalPlan.cachedOperatorIds.contains(opId) + !logicalPlan.opsToReuseCache.contains(opId) } else { logger.info("Operator: {} is not updated.", operatorRecord(opId)) false @@ -297,7 +297,7 @@ class WorkflowRewriter( operatorRecord += ((opId, getWorkflowVertex(workflowDAG.getOperator(opId)))) logger.info("Vertex {} is not recorded.", operatorRecord(opId)) true - } else if (logicalPlan.cachedOperatorIds.contains(opId)) { + } else if (logicalPlan.opsToReuseCache.contains(opId)) { if (cachedOperatorDescriptors.contains(opId)) { val vertex = getWorkflowVertex(workflowDAG.getOperator(opId)) if (operatorRecord(opId).equals(vertex)) { @@ -316,7 +316,7 @@ class WorkflowRewriter( logger.info("Vertex {} is updated.", operatorRecord(opId)) true } else if (cachedOperatorDescriptors.contains(opId)) { - !logicalPlan.cachedOperatorIds.contains(opId) + !logicalPlan.opsToReuseCache.contains(opId) } else { logger.info("Operator: {} is not updated.", operatorRecord(opId)) false @@ -344,7 +344,7 @@ class WorkflowRewriter( } private def isCacheEnabled(desc: OperatorDescriptor): Boolean = { - if (!logicalPlan.cachedOperatorIds.contains(desc.operatorID)) { + if (!logicalPlan.opsToReuseCache.contains(desc.operatorID)) { cachedOperatorDescriptors.remove(desc.operatorID) logger.info("Operator {} cache not enabled.", desc) return false diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala index 9f6273754a7..202523b6d79 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/cache/CacheSourceOpDesc.scala @@ -19,7 +19,7 @@ class CacheSourceOpDesc(val targetSinkStorageId: String, opResultStorage: OpResu assert(null != targetSinkStorageId) assert(null != opResultStorage) - var schema: Schema = _ + var schema: Schema = opResultStorage.get(targetSinkStorageId).getSchema override def sourceSchema(): Schema = schema diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala index 9648b1df768..b2dc782fbaf 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala @@ -5,13 +5,10 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.texera.web.resource.dashboard.user.file.UserFileAccessResource import edu.uci.ics.texera.workflow.common.WorkflowContext -import edu.uci.ics.texera.workflow.common.metadata.{ - OperatorGroupConstants, - OperatorInfo, - OutputPort -} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo, OutputPort} import edu.uci.ics.texera.workflow.common.operators.source.SourceOperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.Schema +import org.apache.commons.lang3.builder.EqualsBuilder import java.util.Collections.singletonList import scala.collection.JavaConverters.asScalaBuffer @@ -96,4 +93,7 @@ abstract class ScanSourceOpDesc extends SourceOperatorDescriptor { } def inferSchema(): Schema + + override def equals(that: Any): Boolean = EqualsBuilder.reflectionEquals(this, that, + "context", "filePath") } diff --git a/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala b/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala index c54eafe75d7..9c7862ceabe 100644 --- a/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala @@ -48,7 +48,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpoints = List[BreakpointInfo]() val logicalPlan = LogicalPlan(operators, links, breakpoints) - logicalPlan.cachedOperatorIds = List[String]() rewriter = new WorkflowRewriter( logicalPlan, mutable.HashMap[String, OperatorDescriptor](), @@ -84,8 +83,7 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) - logicalPlan.cachedOperatorIds = List(sourceOperator.operatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) val tuples = mutable.MutableList[Tuple]() val cacheSourceOperator = new CacheSourceOpDesc(uuid, opResultStorage) @@ -142,8 +140,7 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val destination = OperatorPort(sinkOperator.operatorID, 0) links += OperatorLink(origin, destination) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) - logicalPlan.cachedOperatorIds = List(sourceOperator.operatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() val cacheSourceOperators = mutable.HashMap[String, CacheSourceOpDesc]() @@ -199,8 +196,7 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) - logicalPlan.cachedOperatorIds = List(sourceOperator.operatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() val cacheSourceOperators = mutable.HashMap[String, CacheSourceOpDesc]() @@ -250,7 +246,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val destination2 = OperatorPort(sinkOperator.operatorID, 0) links += OperatorLink(origin2, destination2) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) val tuples = mutable.MutableList[Tuple]() val uuid = UUID.randomUUID().toString @@ -261,8 +256,8 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cachedOperatorID = filterOperator.operatorID - logicalPlan.cachedOperatorIds = List(cachedOperatorID) operatorOutputCache += ((cachedOperatorID, tuples)) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() cachedOperators += ((cachedOperatorID, operatorToString(filterOperator))) @@ -332,7 +327,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) val tuples = mutable.MutableList[Tuple]() val uuid = UUID.randomUUID().toString @@ -342,7 +336,7 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cachedOperatorID = filterOperator.operatorID - logicalPlan.cachedOperatorIds = List(cachedOperatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) operatorOutputCache += ((cachedOperatorID, tuples)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() @@ -415,7 +409,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { links += create00Link(filterOperator2, filterOperator3) links += create00Link(filterOperator3, sinkOperator) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) val uuidForFilter3 = UUID.randomUUID().toString val cacheSourceForFilter3 = new CacheSourceOpDesc(uuidForFilter3, opResultStorage) @@ -430,8 +423,9 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cacheSinkOperators = mutable.HashMap[String, ProgressiveSinkOpDesc]() cacheSinkOperators += ((cachedOperatorIDForFilter3, cacheSinkForFilter3)) - logicalPlan.cachedOperatorIds = - List[String](cachedOperatorIDForFilter3, filterOperator.operatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, + List(cachedOperatorIDForFilter3, filterOperator.operatorID)) + rewriter = new WorkflowRewriter( logicalPlan, @@ -501,7 +495,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { links += create00Link(filterOperator2, filterOperator3) links += create00Link(filterOperator3, sinkOperator) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList) val uuidForFilter = UUID.randomUUID().toString val cacheSourceForFilter = new CacheSourceOpDesc(uuidForFilter, opResultStorage) @@ -516,8 +509,8 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cacheSinkOperators = mutable.HashMap[String, ProgressiveSinkOpDesc]() cacheSinkOperators += ((cachedOperatorIDForFilter, cacheSinkForFilter)) - logicalPlan.cachedOperatorIds = - List[String](cachedOperatorIDForFilter, filterOperator3.operatorID) + val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, + List(cachedOperatorIDForFilter, filterOperator3.operatorID)) rewriter = new WorkflowRewriter( logicalPlan, From 5cc37aa315f180c57aea01c3b9b5d58e0586ef85 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 7 Jul 2023 04:15:34 -0700 Subject: [PATCH 08/18] complete cache --- .../request/CacheStatusUpdateRequest.scala | 6 +- .../request/WorkflowExecuteRequest.scala | 2 +- .../resource/WorkflowWebsocketResource.scala | 12 ++- .../texera/web/service/JobResultService.scala | 41 ++++++--- .../web/service/WorkflowCacheChecker.scala | 81 ++++++++-------- .../web/service/WorkflowJobService.scala | 2 +- .../texera/web/service/WorkflowService.scala | 18 ++-- .../web/storage/WorkflowResultStore.scala | 4 +- .../common/workflow/LogicalPlan.scala | 6 +- .../workflow/SinkInjectionTransformer.scala | 13 ++- .../workflow/WorkflowCacheRewriter.scala | 37 +++++--- .../common/workflow/WorkflowCompiler.scala | 13 +-- .../source/scan/ScanSourceOpDesc.scala | 10 +- .../workflow/WorkflowRewriterSpec.scala | 48 +++++++--- .../navigation/navigation.component.html | 34 +++++-- .../context-menu/context-menu.component.html | 30 +++++- .../workflow-editor.component.ts | 15 +++ .../component/workspace.component.ts | 4 +- .../execute-workflow.service.ts | 8 +- .../execute-workflow/mock-workflow-plan.ts | 2 - .../service/joint-ui/joint-ui.service.ts | 92 ++++++++++++------- .../operator-menu/operator-menu.service.ts | 43 ++++++++- .../model/shared-model-change-handler.ts | 33 +++++-- .../model/workflow-action.service.ts | 19 +++- .../workflow-graph/model/workflow-graph.ts | 85 +++++++++++++++-- .../operator-cache-status.service.spec.ts | 6 +- .../operator-cache-status.service.ts | 26 +++--- .../types/execute-workflow.interface.ts | 3 +- .../types/workflow-common.interface.ts | 3 +- .../types/workflow-websocket.interface.ts | 6 +- .../svg/operator-reuse-cache-invalid.svg | 3 + .../assets/svg/operator-reuse-cache-valid.svg | 3 + .../src/assets/svg/operator-view-result.svg | 4 + 33 files changed, 510 insertions(+), 202 deletions(-) create mode 100644 core/new-gui/src/assets/svg/operator-reuse-cache-invalid.svg create mode 100644 core/new-gui/src/assets/svg/operator-reuse-cache-valid.svg create mode 100644 core/new-gui/src/assets/svg/operator-view-result.svg diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala index f879002c0c4..fc10f4fbfdc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/CacheStatusUpdateRequest.scala @@ -7,13 +7,11 @@ case class CacheStatusUpdateRequest( operators: List[OperatorDescriptor], links: List[OperatorLink], breakpoints: List[BreakpointInfo], - opsToViewResult: List[String], - opsToReuseResult: List[String] + opsToViewResult: List[String], + opsToReuseResult: List[String] ) extends TexeraWebSocketRequest { def toLogicalPlanPojo() = { LogicalPlanPojo(operators, links, breakpoints, opsToViewResult, opsToReuseResult) } } - - diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala index f994f492e67..fa974e6b74e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/WorkflowExecuteRequest.scala @@ -13,6 +13,6 @@ case class LogicalPlanPojo( operators: List[OperatorDescriptor], links: List[OperatorLink], breakpoints: List[BreakpointInfo], - opsToViewResult: List[String], + opsToViewResult: List[String], opsToReuseResult: List[String] ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala index 48172a52ed2..93d3d31c999 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/WorkflowWebsocketResource.scala @@ -5,7 +5,12 @@ import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.texera.Utils import edu.uci.ics.texera.web.{ServletAwareConfigurator, SessionState} import edu.uci.ics.texera.web.model.jooq.generated.tables.pojos.User -import edu.uci.ics.texera.web.model.websocket.event.{CacheStatusUpdateEvent, TexeraWebSocketEvent, WorkflowErrorEvent, WorkflowStateEvent} +import edu.uci.ics.texera.web.model.websocket.event.{ + CacheStatusUpdateEvent, + TexeraWebSocketEvent, + WorkflowErrorEvent, + WorkflowStateEvent +} import edu.uci.ics.texera.web.model.websocket.request._ import edu.uci.ics.texera.web.model.websocket.response._ import edu.uci.ics.texera.web.service.{WorkflowCacheChecker, WorkflowService} @@ -82,8 +87,9 @@ class WorkflowWebsocketResource extends LazyLogging { if (oldPlan != null) { val newPlan = LogicalPlan.apply(cacheStatusUpdateRequest.toLogicalPlanPojo()) val validCacheOps = new WorkflowCacheChecker(oldPlan, newPlan).getValidCacheReuse() - val cacheUpdateResult = cacheStatusUpdateRequest.opsToReuseResult.map(o => - (o, if (validCacheOps.contains(o)) "cache valid" else "cache invalid")).toMap + val cacheUpdateResult = cacheStatusUpdateRequest.opsToReuseResult + .map(o => (o, if (validCacheOps.contains(o)) "cache valid" else "cache invalid")) + .toMap send(session, CacheStatusUpdateEvent(cacheUpdateResult)) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index 4475876f515..3f32d5d0751 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -8,10 +8,19 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.FatalErr import edu.uci.ics.amber.engine.common.AmberUtils import edu.uci.ics.amber.engine.common.client.AmberClient import edu.uci.ics.amber.engine.common.tuple.ITuple -import edu.uci.ics.texera.web.model.websocket.event.{PaginatedResultEvent, TexeraWebSocketEvent, WebResultUpdateEvent} +import edu.uci.ics.texera.web.model.websocket.event.{ + PaginatedResultEvent, + TexeraWebSocketEvent, + WebResultUpdateEvent +} import edu.uci.ics.texera.web.model.websocket.request.ResultPaginationRequest import edu.uci.ics.texera.web.service.WebResultUpdate.{WebResultUpdate, convertWebResultUpdate} -import edu.uci.ics.texera.web.storage.{JobStateStore, OperatorResultMetadata, WorkflowResultStore, WorkflowStateStore} +import edu.uci.ics.texera.web.storage.{ + JobStateStore, + OperatorResultMetadata, + WorkflowResultStore, + WorkflowStateStore +} import edu.uci.ics.texera.web.workflowruntimestate.JobMetadataStore import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING import edu.uci.ics.texera.web.{SubscriptionManager, TexeraWebApplication} @@ -29,9 +38,9 @@ import scala.concurrent.duration.DurationInt object WebResultUpdate { // convert Tuple from engine's format to JSON format def webDataFromTuple( - mode: WebOutputMode, - table: List[ITuple], - chartType: Option[String] + mode: WebOutputMode, + table: List[ITuple], + chartType: Option[String] ): WebDataUpdate = { val tableInJson = table.map(t => t.asInstanceOf[Tuple].asKeyValuePairJson()) WebDataUpdate(mode, tableInJson, chartType) @@ -45,9 +54,9 @@ object WebResultUpdate { * @return list of indices of modified pages, index starts from 1 */ def calculateDirtyPageIndices( - beforeSnapshot: List[ITuple], - afterSnapshot: List[ITuple], - pageSize: Int + beforeSnapshot: List[ITuple], + afterSnapshot: List[ITuple], + pageSize: Int ): List[Int] = { var currentIndex = 1 var currentIndexPageCount = 0 @@ -94,14 +103,13 @@ object WebResultUpdate { final case class SetDeltaMode() extends WebOutputMode case class WebPaginationUpdate( - mode: PaginationMode, - totalNumTuples: Long, - dirtyPageIndices: List[Int] + mode: PaginationMode, + totalNumTuples: Long, + dirtyPageIndices: List[Int] ) extends WebResultUpdate case class WebDataUpdate(mode: WebOutputMode, table: List[ObjectNode], chartType: Option[String]) - extends WebResultUpdate - + extends WebResultUpdate /** * For SET_SNAPSHOT output mode: result is the latest snapshot @@ -111,8 +119,11 @@ object WebResultUpdate { * * Produces the WebResultUpdate to send to frontend from a result update from the engine. */ - def convertWebResultUpdate(sink: ProgressiveSinkOpDesc, - oldTupleCount: Int, newTupleCount: Int): WebResultUpdate = { + def convertWebResultUpdate( + sink: ProgressiveSinkOpDesc, + oldTupleCount: Int, + newTupleCount: Int + ): WebResultUpdate = { val webOutputMode: WebOutputMode = { (sink.getOutputMode, sink.getChartType) match { // visualization sinks use its corresponding mode diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala index 3529392ba36..8c362f98f0e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowCacheChecker.scala @@ -23,11 +23,13 @@ class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { } // for each operator in the old workflow, add it to its own equivalence class - oldWorkflow.jgraphtDag.iterator().forEachRemaining(opId => { - val oldId = "old-" + opId - equivalenceClass.put(oldId, nextClassId) - nextClassId += 1 - }) + oldWorkflow.jgraphtDag + .iterator() + .forEachRemaining(opId => { + val oldId = "old-" + opId + equivalenceClass.put(oldId, nextClassId) + nextClassId += 1 + }) // for each operator in the new workflow // check if @@ -36,43 +38,50 @@ class WorkflowCacheChecker(oldWorkflow: LogicalPlan, newWorkflow: LogicalPlan) { // // if both conditions are met, then the two operators are equal, // else a new equivalence class is created - newWorkflow.jgraphtDag.iterator().forEachRemaining(opId => { - val newOp = newWorkflow.getOperator(opId) - val newOpUpstreamClasses = newWorkflow.getUpstream(opId) - .map(op => equivalenceClass("new-" + op.operatorID)) - val oldOp = oldWorkflow.operators.find(op => op.equals(newOp)).orNull + newWorkflow.jgraphtDag + .iterator() + .forEachRemaining(opId => { + val newOp = newWorkflow.getOperator(opId) + val newOpUpstreamClasses = newWorkflow + .getUpstream(opId) + .map(op => equivalenceClass("new-" + op.operatorID)) + val oldOp = oldWorkflow.operators.find(op => op.equals(newOp)).orNull - // check if the old workflow contains the same operator content - val newOpClassId = if (oldOp == null) { - getNextClassId() // operator not found, create a new class - } else{ - // check its inputs are all in the same equivalence class - val oldId = "old-" + oldOp.operatorID - val oldOpUpstreamClasses = oldWorkflow.getUpstream( oldOp.operatorID) - .map(op => equivalenceClass("old-" + op.operatorID)) - if (oldOpUpstreamClasses.equals(newOpUpstreamClasses)) { - equivalenceClass(oldId) // same equivalence class + // check if the old workflow contains the same operator content + val newOpClassId = if (oldOp == null) { + getNextClassId() // operator not found, create a new class } else { - getNextClassId() // inputs are no the same, new class + // check its inputs are all in the same equivalence class + val oldId = "old-" + oldOp.operatorID + val oldOpUpstreamClasses = oldWorkflow + .getUpstream(oldOp.operatorID) + .map(op => equivalenceClass("old-" + op.operatorID)) + if (oldOpUpstreamClasses.equals(newOpUpstreamClasses)) { + equivalenceClass(oldId) // same equivalence class + } else { + getNextClassId() // inputs are no the same, new class + } } - } - equivalenceClass.put("new-" + opId, newOpClassId) - }) - + equivalenceClass.put("new-" + opId, newOpClassId) + }) // for each cached operator in the old workflow, // check if it can be still used in the new workflow - oldWorkflow.terminalOperators.map(sinkOpId => { - val opId = oldWorkflow.getUpstream(sinkOpId).head.operatorID - val oldCachedOpId = "old-" + opId - // find its equivalence class - val oldClassId = equivalenceClass(oldCachedOpId) - // find the corresponding operator that can still use this cache - val newOpId = equivalenceClass.find(p => p._2 == oldClassId && p._1 != oldCachedOpId) - .map(p => p._1).orNull - if (newOpId == null) null else opId - }).filter(o => o != null).toSet + oldWorkflow.terminalOperators + .map(sinkOpId => { + val opId = oldWorkflow.getUpstream(sinkOpId).head.operatorID + val oldCachedOpId = "old-" + opId + // find its equivalence class + val oldClassId = equivalenceClass(oldCachedOpId) + // find the corresponding operator that can still use this cache + val newOpId = equivalenceClass + .find(p => p._2 == oldClassId && p._1 != oldCachedOpId) + .map(p => p._1) + .orNull + if (newOpId == null) null else opId + }) + .filter(o => o != null) + .toSet } } - diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala index c816a9374c5..e9f25e8b8a7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowJobService.scala @@ -26,7 +26,7 @@ class WorkflowJobService( request: WorkflowExecuteRequest, errorHandler: Throwable => Unit, engineVersion: String, - lastCompletedLogicalPlan: LogicalPlan, + lastCompletedLogicalPlan: LogicalPlan ) extends SubscriptionManager with LazyLogging { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala index 31e36a6e950..0c2691819ed 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/WorkflowService.scala @@ -88,14 +88,18 @@ class WorkflowService( var lastCompletedLogicalPlan: LogicalPlan = null - jobService.subscribe { job: WorkflowJobService => { - job.stateStore.jobMetadataStore.registerDiffHandler { (oldState, newState) => { - if (oldState.state != COMPLETED && newState.state == COMPLETED) { - lastCompletedLogicalPlan = job.workflowCompiler.logicalPlan + jobService.subscribe { job: WorkflowJobService => + { + job.stateStore.jobMetadataStore.registerDiffHandler { (oldState, newState) => + { + if (oldState.state != COMPLETED && newState.state == COMPLETED) { + lastCompletedLogicalPlan = job.workflowCompiler.logicalPlan + } + Iterable.empty + } } - Iterable.empty - }} - }} + } + } addSubscription( wsInput.subscribe((evt: WorkflowExecuteRequest, uidOpt) => initJobService(evt, uidOpt)) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala index dfea2230128..3a71254ff4d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/storage/WorkflowResultStore.scala @@ -2,6 +2,6 @@ package edu.uci.ics.texera.web.storage case class OperatorResultMetadata(tupleCount: Int = 0, changeDetector: String = "") -case class WorkflowResultStore ( - resultInfo: Map[String, OperatorResultMetadata] = Map.empty +case class WorkflowResultStore( + resultInfo: Map[String, OperatorResultMetadata] = Map.empty ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index 2949712a99a..6bfd6cdcc80 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -108,7 +108,9 @@ case class LogicalPlan( def removeOperator(operatorId: String): LogicalPlan = { this.copy( operators.filter(o => o.operatorID != operatorId), - links.filter(l => l.origin.operatorID != operatorId && l.destination.operatorID != operatorId), + links.filter(l => + l.origin.operatorID != operatorId && l.destination.operatorID != operatorId + ), breakpoints.filter(b => b.operatorID != operatorId), opsToReuseCache.filter(c => c != operatorId) ) @@ -139,7 +141,7 @@ case class LogicalPlan( } def removeEdge( - edge: OperatorLink + edge: OperatorLink ): LogicalPlan = { val newLinks = links.filter(l => l != edge) this.copy(operators, newLinks, breakpoints, opsToReuseCache) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala index 5b87bfaebb0..4e26772a1a2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/SinkInjectionTransformer.scala @@ -8,15 +8,16 @@ import edu.uci.ics.texera.workflow.operators.visualization.VisualizationOperator object SinkInjectionTransformer { def transform(pojo: LogicalPlanPojo): LogicalPlan = { - var logicalPlan = LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, pojo.opsToReuseResult) + var logicalPlan = + LogicalPlan(pojo.operators, pojo.links, pojo.breakpoints, pojo.opsToReuseResult) // for any terminal operator without a sink, add a sink val nonSinkTerminalOps = logicalPlan.getTerminalOperators.filter(opId => - ! logicalPlan.getOperator(opId).isInstanceOf[SinkOpDesc] + !logicalPlan.getOperator(opId).isInstanceOf[SinkOpDesc] ) // for any operators marked as view result without a sink, add a sink val viewResultOps = pojo.opsToViewResult.filter(opId => - ! logicalPlan.getDownstream(opId).exists(op => op.isInstanceOf[SinkOpDesc]) + !logicalPlan.getDownstream(opId).exists(op => op.isInstanceOf[SinkOpDesc]) ) val operatorsToAddSink = (nonSinkTerminalOps ++ viewResultOps).toSet @@ -31,7 +32,11 @@ object SinkInjectionTransformer { }) // check precondition: all the terminal operators should sinks now - assert(logicalPlan.getTerminalOperators.forall(o => logicalPlan.getOperator(o).isInstanceOf[SinkOpDesc])) + assert( + logicalPlan.getTerminalOperators.forall(o => + logicalPlan.getOperator(o).isInstanceOf[SinkOpDesc] + ) + ) // for each sink: // set the corresponding upstream ID and port diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala index cc7513644ef..c6fa4411d4a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCacheRewriter.scala @@ -7,9 +7,9 @@ import edu.uci.ics.texera.workflow.operators.source.cache.CacheSourceOpDesc object WorkflowCacheRewriter { def transform( - logicalPlan: LogicalPlan, - storage: OpResultStorage, - availableCache: Set[String], + logicalPlan: LogicalPlan, + storage: OpResultStorage, + availableCache: Set[String] ): LogicalPlan = { var resultPlan = logicalPlan @@ -32,22 +32,35 @@ object WorkflowCacheRewriter { // replace the connection of all outgoing edges of opId with the cache val edgesToReplace = resultPlan.getDownstreamEdges(opId) edgesToReplace.foreach(e => { - resultPlan = resultPlan.removeEdge(e.origin.operatorID, e.destination.operatorID, - e.origin.portOrdinal, e.destination.portOrdinal) - resultPlan = resultPlan.addEdge(materializationReader.operatorID, e.destination.operatorID, - 0, e.destination.portOrdinal) + resultPlan = resultPlan.removeEdge( + e.origin.operatorID, + e.destination.operatorID, + e.origin.portOrdinal, + e.destination.portOrdinal + ) + resultPlan = resultPlan.addEdge( + materializationReader.operatorID, + e.destination.operatorID, + 0, + e.destination.portOrdinal + ) }) }) // operators that are no longer reachable by any sink don't need to run val allOperators = resultPlan.operators.map(op => op.operatorID).toSet - val sinkOps = resultPlan.operators.filter(op => op.isInstanceOf[SinkOpDesc]).map(o => o.operatorID) + val sinkOps = + resultPlan.operators.filter(op => op.isInstanceOf[SinkOpDesc]).map(o => o.operatorID) val usefulOperators = sinkOps ++ sinkOps.flatMap(o => resultPlan.getAncestorOpIds(o)).toSet - allOperators.diff(usefulOperators.toSet).foreach(o => { - resultPlan = resultPlan.removeOperator(o) - }) + allOperators + .diff(usefulOperators.toSet) + .foreach(o => { + resultPlan = resultPlan.removeOperator(o) + }) - assert(resultPlan.terminalOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc])) + assert( + resultPlan.terminalOperators.forall(o => resultPlan.getOperator(o).isInstanceOf[SinkOpDesc]) + ) resultPlan } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index 9d0f0b30ec6..76b0b0281cf 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -35,10 +35,10 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex .map(o => (o._1, o._2.validate().toSet)) .filter(o => o._2.nonEmpty) - private def assignSinkStorage( - logicalPlan: LogicalPlan, storage: OpResultStorage, - reuseStorageSet: Set[String] = Set() + logicalPlan: LogicalPlan, + storage: OpResultStorage, + reuseStorageSet: Set[String] = Set() ) = { // assign storage to texera-managed sinks before generating exec config logicalPlan.operators.foreach { @@ -66,13 +66,14 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex } def amberWorkflow( - workflowId: WorkflowIdentity, - opResultStorage: OpResultStorage, + workflowId: WorkflowIdentity, + opResultStorage: OpResultStorage, lastCompletedJob: LogicalPlan = null ): Workflow = { val cacheReuses = new WorkflowCacheChecker(lastCompletedJob, logicalPlan).getValidCacheReuse() val opsToReuseCache = cacheReuses.intersect(logicalPlan.opsToReuseCache.toSet) - val rewrittenLogicalPlan = WorkflowCacheRewriter.transform(logicalPlan, opResultStorage, opsToReuseCache) + val rewrittenLogicalPlan = + WorkflowCacheRewriter.transform(logicalPlan, opResultStorage, opsToReuseCache) rewrittenLogicalPlan.operatorMap.values.foreach(initOperator) assignSinkStorage(logicalPlan, opResultStorage, opsToReuseCache) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala index b2dc782fbaf..f6c6bbe6e2d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/scan/ScanSourceOpDesc.scala @@ -5,7 +5,11 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.texera.web.resource.dashboard.user.file.UserFileAccessResource import edu.uci.ics.texera.workflow.common.WorkflowContext -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{ + OperatorGroupConstants, + OperatorInfo, + OutputPort +} import edu.uci.ics.texera.workflow.common.operators.source.SourceOperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.commons.lang3.builder.EqualsBuilder @@ -94,6 +98,6 @@ abstract class ScanSourceOpDesc extends SourceOperatorDescriptor { def inferSchema(): Schema - override def equals(that: Any): Boolean = EqualsBuilder.reflectionEquals(this, that, - "context", "filePath") + override def equals(that: Any): Boolean = + EqualsBuilder.reflectionEquals(this, that, "context", "filePath") } diff --git a/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala b/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala index 9c7862ceabe..617944674a8 100644 --- a/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowRewriterSpec.scala @@ -83,7 +83,12 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) + val logicalPlan = LogicalPlan( + operators.toList, + links.toList, + breakpoints.toList, + List(sourceOperator.operatorID) + ) val tuples = mutable.MutableList[Tuple]() val cacheSourceOperator = new CacheSourceOpDesc(uuid, opResultStorage) @@ -140,7 +145,12 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val destination = OperatorPort(sinkOperator.operatorID, 0) links += OperatorLink(origin, destination) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) + val logicalPlan = LogicalPlan( + operators.toList, + links.toList, + breakpoints.toList, + List(sourceOperator.operatorID) + ) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() val cacheSourceOperators = mutable.HashMap[String, CacheSourceOpDesc]() @@ -196,7 +206,12 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(sourceOperator.operatorID)) + val logicalPlan = LogicalPlan( + operators.toList, + links.toList, + breakpoints.toList, + List(sourceOperator.operatorID) + ) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() val cacheSourceOperators = mutable.HashMap[String, CacheSourceOpDesc]() @@ -246,7 +261,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val destination2 = OperatorPort(sinkOperator.operatorID, 0) links += OperatorLink(origin2, destination2) - val tuples = mutable.MutableList[Tuple]() val uuid = UUID.randomUUID().toString val cacheSourceOperator = new CacheSourceOpDesc(uuid, opResultStorage) @@ -257,7 +271,8 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cachedOperatorID = filterOperator.operatorID operatorOutputCache += ((cachedOperatorID, tuples)) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) + val logicalPlan = + LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() cachedOperators += ((cachedOperatorID, operatorToString(filterOperator))) @@ -327,7 +342,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val breakpointInfo = BreakpointInfo(sourceOperator.operatorID, CountBreakpoint(0)) breakpoints += breakpointInfo - val tuples = mutable.MutableList[Tuple]() val uuid = UUID.randomUUID().toString val cacheSourceOperator = new CacheSourceOpDesc(uuid, opResultStorage) @@ -336,7 +350,8 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cachedOperatorID = filterOperator.operatorID - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) + val logicalPlan = + LogicalPlan(operators.toList, links.toList, breakpoints.toList, List(cachedOperatorID)) operatorOutputCache += ((cachedOperatorID, tuples)) val cachedOperators = mutable.HashMap[String, OperatorDescriptor]() @@ -409,7 +424,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { links += create00Link(filterOperator2, filterOperator3) links += create00Link(filterOperator3, sinkOperator) - val uuidForFilter3 = UUID.randomUUID().toString val cacheSourceForFilter3 = new CacheSourceOpDesc(uuidForFilter3, opResultStorage) val cacheSinkForFilter3 = new ProgressiveSinkOpDesc() @@ -423,9 +437,12 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cacheSinkOperators = mutable.HashMap[String, ProgressiveSinkOpDesc]() cacheSinkOperators += ((cachedOperatorIDForFilter3, cacheSinkForFilter3)) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, - List(cachedOperatorIDForFilter3, filterOperator.operatorID)) - + val logicalPlan = LogicalPlan( + operators.toList, + links.toList, + breakpoints.toList, + List(cachedOperatorIDForFilter3, filterOperator.operatorID) + ) rewriter = new WorkflowRewriter( logicalPlan, @@ -495,7 +512,6 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { links += create00Link(filterOperator2, filterOperator3) links += create00Link(filterOperator3, sinkOperator) - val uuidForFilter = UUID.randomUUID().toString val cacheSourceForFilter = new CacheSourceOpDesc(uuidForFilter, opResultStorage) val cacheSinkForFilter = new ProgressiveSinkOpDesc() @@ -509,8 +525,12 @@ class WorkflowRewriterSpec extends AnyFlatSpec with BeforeAndAfter { val cacheSinkOperators = mutable.HashMap[String, ProgressiveSinkOpDesc]() cacheSinkOperators += ((cachedOperatorIDForFilter, cacheSinkForFilter)) - val logicalPlan = LogicalPlan(operators.toList, links.toList, breakpoints.toList, - List(cachedOperatorIDForFilter, filterOperator3.operatorID)) + val logicalPlan = LogicalPlan( + operators.toList, + links.toList, + breakpoints.toList, + List(cachedOperatorIDForFilter, filterOperator3.operatorID) + ) rewriter = new WorkflowRewriter( logicalPlan, diff --git a/core/new-gui/src/app/workspace/component/navigation/navigation.component.html b/core/new-gui/src/app/workspace/component/navigation/navigation.component.html index c346a779cca..e9450911d4b 100644 --- a/core/new-gui/src/app/workspace/component/navigation/navigation.component.html +++ b/core/new-gui/src/app/workspace/component/navigation/navigation.component.html @@ -282,27 +282,49 @@ nzType="stop"> + +
  • cache output + >view result
  • + remove view result +
  • +
  • + reuse result +
  • +
  • remove cache + >remove reusing result
  • { + event.newViewResultOps.concat(event.newUnviewResultOps).forEach(opID => { + const op = this.workflowActionService.getTexeraGraph().getOperator(opID); + this.jointUIService.changeOperatorViewResultStatus(this.getJointPaper(), op, op.viewResult); + }); + }); + } + private registerOperatorDisplayNameChangeHandler(): void { this.workflowActionService diff --git a/core/new-gui/src/app/workspace/component/workspace.component.ts b/core/new-gui/src/app/workspace/component/workspace.component.ts index 896769c3a93..36663c32580 100644 --- a/core/new-gui/src/app/workspace/component/workspace.component.ts +++ b/core/new-gui/src/app/workspace/component/workspace.component.ts @@ -18,7 +18,7 @@ import { NzMessageService } from "ng-zorro-antd/message"; import { WorkflowConsoleService } from "../service/workflow-console/workflow-console.service"; import { debounceTime, distinctUntilChanged, filter, switchMap } from "rxjs/operators"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; -import { OperatorCacheStatusService } from "../service/workflow-status/operator-cache-status.service"; +import { OperatorReuseStatusService } from "../service/workflow-status/operator-cache-status.service"; import { of } from "rxjs"; import { isDefined } from "../../common/util/predicate"; import { NotificationService } from "src/app/common/service/notification/notification.service"; @@ -50,7 +50,7 @@ export class WorkspaceComponent implements AfterViewInit, OnInit, OnDestroy { private schemaPropagationService: SchemaPropagationService, private autoAttributeCorrectionService: AutoAttributeCorrectionService, private undoRedoService: UndoRedoService, - private operatorCacheStatus: OperatorCacheStatusService, + private operatorCacheStatus: OperatorReuseStatusService, private workflowCacheService: WorkflowCacheService, private workflowPersistService: WorkflowPersistService, private workflowWebsocketService: WorkflowWebsocketService, diff --git a/core/new-gui/src/app/workspace/service/execute-workflow/execute-workflow.service.ts b/core/new-gui/src/app/workspace/service/execute-workflow/execute-workflow.service.ts index 4f7ab9410a1..4005e45cf32 100644 --- a/core/new-gui/src/app/workspace/service/execute-workflow/execute-workflow.service.ts +++ b/core/new-gui/src/app/workspace/service/execute-workflow/execute-workflow.service.ts @@ -441,11 +441,15 @@ export class ExecuteWorkflowService { ExecuteWorkflowService.transformBreakpoint(workflowGraph, e[0], e[1]) ); - const cachedOperatorIds: string[] = Array.from(workflowGraph.getCachedOperators()).filter( + const opsToViewResult: string[] = Array.from(workflowGraph.getCachedOperators()).filter( op => !workflowGraph.isOperatorDisabled(op) ); - return { operators, links, breakpoints, cachedOperatorIds }; + const opsToReuseResult: string[] = Array.from(workflowGraph.getOperatorsMarkedForReuseResult()).filter( + op => !workflowGraph.isOperatorDisabled(op) + ); + + return { operators, links, breakpoints, opsToViewResult, opsToReuseResult }; } public static transformBreakpoint( diff --git a/core/new-gui/src/app/workspace/service/execute-workflow/mock-workflow-plan.ts b/core/new-gui/src/app/workspace/service/execute-workflow/mock-workflow-plan.ts index 2327ec24a39..bba23432e00 100644 --- a/core/new-gui/src/app/workspace/service/execute-workflow/mock-workflow-plan.ts +++ b/core/new-gui/src/app/workspace/service/execute-workflow/mock-workflow-plan.ts @@ -42,7 +42,6 @@ export const mockLogicalPlan_scan_result: LogicalPlan = { }, ], breakpoints: [], - cachedOperatorIds: [], }; export const mockWorkflowPlan_scan_sentiment_result: WorkflowGraph = new WorkflowGraph( @@ -95,5 +94,4 @@ export const mockLogicalPlan_scan_sentiment_result: LogicalPlan = { }, ], breakpoints: [], - cachedOperatorIds: [], }; diff --git a/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts b/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts index edbf5b2bc4c..b72a6ea4cbb 100644 --- a/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts +++ b/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts @@ -126,8 +126,9 @@ export const sourceOperatorHandle = "M 0 0 L 0 8 L 8 8 L 8 0 z"; */ export const targetOperatorHandle = "M 12 0 L 0 6 L 12 12 z"; -export const operatorCacheTextClass = "texera-operator-result-cache-text"; -export const operatorCacheIconClass = "texera-operator-result-cache-icon"; +export const operatorReuseCacheTextClass = "texera-operator-result-reuse-text"; +export const operatorReuseCacheIconClass = "texera-operator-result-reuse-icon"; +export const operatorViewResultIconClass = "texera-operator-view-result-icon"; export const operatorStateClass = "texera-operator-state"; export const operatorProcessedCountClass = "texera-operator-processed-count"; export const operatorOutputCountClass = "texera-operator-output-count"; @@ -156,10 +157,11 @@ class TexeraCustomJointElement extends joint.shapes.devs.Model { - + - + + @@ -504,7 +506,17 @@ export class JointUIService { jointPaper.getModelById(operator.operatorID).attr("rect.body/fill", JointUIService.getOperatorFillColor(operator)); } - public changeOperatorCacheStatus( + public changeOperatorViewResultStatus( + jointPaper: joint.dia.Paper, + operator: OperatorPredicate, + viewResult?: boolean + ): void { + const icon = JointUIService.getOperatorViewResultIcon(operator); + jointPaper.getModelById(operator.operatorID).attr( + `.${operatorViewResultIconClass}/xlink:href`, icon); + } + + public changeOperatorReuseCacheStatus( jointPaper: joint.dia.Paper, operator: OperatorPredicate, cacheStatus?: OperatorResultCacheStatus @@ -512,10 +524,10 @@ export class JointUIService { const cacheText = JointUIService.getOperatorCacheDisplayText(operator, cacheStatus); const cacheIcon = JointUIService.getOperatorCacheIcon(operator, cacheStatus); - const cacheIndicatorText = cacheText === "" ? "" : "cache"; - jointPaper.getModelById(operator.operatorID).attr(`.${operatorCacheTextClass}/text`, cacheIndicatorText); - jointPaper.getModelById(operator.operatorID).attr(`.${operatorCacheIconClass}/xlink:href`, cacheIcon); - jointPaper.getModelById(operator.operatorID).attr(`.${operatorCacheIconClass}/title`, cacheText); + // const cacheIndicatorText = cacheText === "" ? "" : "cache"; + // jointPaper.getModelById(operator.operatorID).attr(`.${operatorCacheTextClass}/text`, cacheIndicatorText); + jointPaper.getModelById(operator.operatorID).attr(`.${operatorReuseCacheIconClass}/xlink:href`, cacheIcon); + // jointPaper.getModelById(operator.operatorID).attr(`.${operatorCacheIconClass}/title`, cacheText); } public changeOperatorJointDisplayName( @@ -906,7 +918,7 @@ export class JointUIService { "x-alignment": "middle", "y-alignment": "middle", }, - ".texera-operator-result-cache-text": { + ".texera-operator-result-reuse-text": { text: JointUIService.getOperatorCacheDisplayText(operator) === "" ? "" : "cache", fill: "#595959", "font-size": "14px", @@ -917,12 +929,22 @@ export class JointUIService { "y-alignment": "middle", "x-alignment": "middle", }, - ".texera-operator-result-cache-icon": { + ".texera-operator-result-reuse-icon": { "xlink:href": JointUIService.getOperatorCacheIcon(operator), - title: JointUIService.getOperatorCacheDisplayText(operator), + // title: JointUIService.getOperatorCacheDisplayText(operator), width: 40, height: 40, "ref-x": 75, + "ref-y": 20, + ref: "rect.body", + "x-alignment": "middle", + "y-alignment": "middle", + }, + ".texera-operator-view-result-icon": { + "xlink:href": JointUIService.getOperatorViewResultIcon(operator), + width: 20, + height: 20, + "ref-x": 75, "ref-y": 50, ref: "rect.body", "x-alignment": "middle", @@ -941,30 +963,38 @@ export class JointUIService { operator: OperatorPredicate, cacheStatus?: OperatorResultCacheStatus ): string { - if (cacheStatus && cacheStatus !== "cache not enabled") { - return cacheStatus; + if (cacheStatus === undefined || !operator.markedForReuse) { + return ""; } - const isCached = operator.isCached ?? false; - return isCached ? "to be cached" : ""; + return cacheStatus; } public static getOperatorCacheIcon(operator: OperatorPredicate, cacheStatus?: OperatorResultCacheStatus): string { - if (cacheStatus && cacheStatus !== "cache not enabled") { - if (cacheStatus === "cache valid") { - return "assets/svg/operator-result-cache-successful.svg"; - } else if (cacheStatus === "cache invalid") { - return "assets/svg/operator-result-cache-invalid.svg"; - } else { - const _exhaustiveCheck: never = cacheStatus; - return ""; - } + console.log('getting operator cache icon for ' + operator.operatorID) + console.log('cache status: ' + cacheStatus) + if (!operator.markedForReuse) { + return ""; + } + if (cacheStatus === "cache valid") { + return "assets/svg/operator-reuse-cache-valid.svg" + } else { + return "assets/svg/operator-reuse-cache-invalid.svg" + } + // if (cacheStatus === "cache valid") { + // return "assets/svg/operator-result-cache-successful.svg"; + // } else if (cacheStatus === "cache invalid") { + // return "assets/svg/operator-result-cache-invalid.svg"; + // } else { + // const _exhaustiveCheck: never = cacheStatus; + // return ""; + // } + } + + public static getOperatorViewResultIcon(operator: OperatorPredicate): string { + if (operator.viewResult) { + return "assets/svg/operator-view-result.svg" } else { - const isCached = operator.isCached ?? false; - if (isCached) { - return "assets/svg/operator-result-cache-to-be-cached.svg"; - } else { - return ""; - } + return ""; } } diff --git a/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts b/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts index e46dea51e80..69834102f67 100644 --- a/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts +++ b/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts @@ -49,11 +49,12 @@ export class OperatorMenuService { public isDisableOperatorClickable: boolean = false; public isDisableOperator: boolean = true; - // whether the cache-operator-button should be enabled - public operatorCacheEnabled: boolean = environment.operatorCacheEnabled; public isCacheOperatorClickable: boolean = false; public isCacheOperator: boolean = true; + public isReuseResultClickable: boolean = false; + public isMarkForReuse: boolean = true; + public readonly COPY_OFFSET = 20; constructor( @@ -63,6 +64,7 @@ export class OperatorMenuService { ) { this.handleDisableOperatorStatusChange(); this.handleCacheOperatorStatusChange(); + this.handleReuseOperatorResultStatusChange(); merge( this.workflowActionService.getJointGraphWrapper().getJointOperatorHighlightStream(), @@ -129,6 +131,20 @@ export class OperatorMenuService { } } + public reuseResultHighlightedOperator(): void { + const effectiveHighlightedOperatorsExcludeSink = this.effectivelyHighlightedOperators.value.filter( + op => !isSink(this.workflowActionService.getTexeraGraph().getOperator(op)) + ); + + console.log("calling mark reuse") + console.log(effectiveHighlightedOperatorsExcludeSink) + if (this.isMarkForReuse) { + this.workflowActionService.markReuseResults(effectiveHighlightedOperatorsExcludeSink); + } else { + this.workflowActionService.removeMarkReuseResults(effectiveHighlightedOperatorsExcludeSink); + } + } + /** * Updates the status of the disable operator icon: * If all selected operators are disabled, then click it will re-enable the operators @@ -154,7 +170,7 @@ export class OperatorMenuService { handleCacheOperatorStatusChange() { merge( this.effectivelyHighlightedOperators, - this.workflowActionService.getTexeraGraph().getCachedOperatorsChangedStream(), + this.workflowActionService.getTexeraGraph().getViewResultOperatorsChangedStream(), this.workflowActionService.getWorkflowModificationEnabledStream() ).subscribe(event => { const effectiveHighlightedOperatorsExcludeSink = this.effectivelyHighlightedOperators.value.filter( @@ -172,6 +188,27 @@ export class OperatorMenuService { }); } + handleReuseOperatorResultStatusChange() { + merge( + this.effectivelyHighlightedOperators, + this.workflowActionService.getTexeraGraph().getReuseCacheOperatorsChangedStream(), + this.workflowActionService.getWorkflowModificationEnabledStream() + ).subscribe(event => { + const effectiveHighlightedOperatorsExcludeSink = this.effectivelyHighlightedOperators.value.filter( + op => !isSink(this.workflowActionService.getTexeraGraph().getOperator(op)) + ); + + const allMarkedForReuse = effectiveHighlightedOperatorsExcludeSink.every(op => + this.workflowActionService.getTexeraGraph().isMarkedForReuseResult(op) + ); + + this.isMarkForReuse = !allMarkedForReuse; + this.isReuseResultClickable = + effectiveHighlightedOperatorsExcludeSink.length !== 0 && + this.workflowActionService.checkWorkflowModificationEnabled(); + }); + } + /** * saves highlighted elements to the system clipboard */ diff --git a/core/new-gui/src/app/workspace/service/workflow-graph/model/shared-model-change-handler.ts b/core/new-gui/src/app/workspace/service/workflow-graph/model/shared-model-change-handler.ts index 4e6e06e4619..7dafba1f5aa 100644 --- a/core/new-gui/src/app/workspace/service/workflow-graph/model/shared-model-change-handler.ts +++ b/core/new-gui/src/app/workspace/service/workflow-graph/model/shared-model-change-handler.ts @@ -249,7 +249,7 @@ export class SharedModelChangeHandler { * - customDisplayName * - operatorProperties * - operatorPorts - * - isCached + * - viewResult * - isDisabled * @private */ @@ -262,19 +262,34 @@ export class SharedModelChangeHandler { // Changes one level below the operatorPredicate type for (const entry of event.changes.keys.entries()) { const contentKey = entry[0]; - if (contentKey === "isCached") { + if (contentKey === "viewResult") { const newCachedStatus = this.texeraGraph.sharedModel.operatorIDMap .get(operatorID) - ?.get("isCached") as boolean; + ?.get("viewResult") as boolean; if (newCachedStatus) { - this.texeraGraph.cachedOperatorChangedSubject.next({ - newCached: [operatorID], - newUnCached: [], + this.texeraGraph.viewResultOperatorChangedSubject.next({ + newViewResultOps: [operatorID], + newUnviewResultOps: [], }); } else { - this.texeraGraph.cachedOperatorChangedSubject.next({ - newCached: [], - newUnCached: [operatorID], + this.texeraGraph.viewResultOperatorChangedSubject.next({ + newViewResultOps: [], + newUnviewResultOps: [operatorID], + }); + } + } else if (contentKey === "markedForReuse") { + const newReuseCacheOps = this.texeraGraph.sharedModel.operatorIDMap + .get(operatorID) + ?.get("markedForReuse") as boolean; + if (newReuseCacheOps) { + this.texeraGraph.reuseOperatorChangedSubject.next({ + newReuseCacheOps: [operatorID], + newUnreuseCacheOps: [], + }); + } else { + this.texeraGraph.reuseOperatorChangedSubject.next({ + newReuseCacheOps: [], + newUnreuseCacheOps: [operatorID], }); } } else if (contentKey === "isDisabled") { diff --git a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts index 31be6da5457..a6015d10420 100644 --- a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts +++ b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts @@ -548,6 +548,22 @@ export class WorkflowActionService { }); } + public markReuseResults(ops: readonly string[]): void { + this.texeraGraph.bundleActions(() => { + ops.forEach(op => { + this.getTexeraGraph().markReuseResult(op); + }); + }) + } + + public removeMarkReuseResults(ops: readonly string[]): void { + this.texeraGraph.bundleActions(() => { + ops.forEach(op => { + this.getTexeraGraph().removeMarkReuseResult(op); + }); + }) + } + public cacheOperators(ops: readonly string[]): void { this.texeraGraph.bundleActions(() => { ops.forEach(op => { @@ -685,7 +701,8 @@ export class WorkflowActionService { this.getTexeraGraph().getCommentBoxAddCommentStream(), this.getTexeraGraph().getCommentBoxDeleteCommentStream(), this.getTexeraGraph().getCommentBoxEditCommentStream(), - this.getTexeraGraph().getCachedOperatorsChangedStream(), + this.getTexeraGraph().getViewResultOperatorsChangedStream(), + this.getTexeraGraph().getReuseCacheOperatorsChangedStream(), this.getTexeraGraph().getOperatorDisplayNameChangedStream(), this.getTexeraGraph().getOperatorVersionChangedStream(), this.getTexeraGraph().getPortDisplayNameChangedSubject(), diff --git a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts index 7cb663417d3..cb66a4fad22 100644 --- a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts +++ b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts @@ -88,9 +88,13 @@ export class WorkflowGraph { newDisabled: string[]; newEnabled: string[]; }>(); - public readonly cachedOperatorChangedSubject = new Subject<{ - newCached: string[]; - newUnCached: string[]; + public readonly viewResultOperatorChangedSubject = new Subject<{ + newViewResultOps: string[]; + newUnviewResultOps: string[]; + }>(); + public readonly reuseOperatorChangedSubject = new Subject<{ + newReuseCacheOps: string[]; + newUnreuseCacheOps: string[] }>(); public readonly operatorDisplayNameChangedSubject = new Subject<{ operatorID: string; @@ -441,7 +445,7 @@ export class WorkflowGraph { if (this.isOperatorCached(operatorID)) { return; } - this.sharedModel.operatorIDMap.get(operatorID)?.set("isCached", true); + this.sharedModel.operatorIDMap.get(operatorID)?.set("viewResult", true); } /** @@ -456,7 +460,7 @@ export class WorkflowGraph { if (!this.isOperatorCached(operatorID)) { return; } - this.sharedModel.operatorIDMap.get(operatorID)?.set("isCached", false); + this.sharedModel.operatorIDMap.get(operatorID)?.set("viewResult", false); } /** @@ -468,7 +472,7 @@ export class WorkflowGraph { if (!operator) { throw new Error(`operator with ID ${operatorID} doesn't exist`); } - return operator.isCached ?? false; + return operator.viewResult ?? false; } public getCachedOperators(): ReadonlySet { @@ -479,6 +483,60 @@ export class WorkflowGraph { ); } + /** + * Changes markedForReuse status which is an atomic boolean value as opposed to y-type data. + * @param operatorID + */ + public markReuseResult(operatorID: string): void { + const operator = this.getOperator(operatorID); + if (!operator) { + throw new Error(`operator with ID ${operatorID} doesn't exist`); + } + if (isSink(operator)) { + return; + } + if (this.isMarkedForReuseResult(operatorID)) { + return; + } + console.log("seeting marked for reuse in shared model") + this.sharedModel.operatorIDMap.get(operatorID)?.set("markedForReuse", true); + } + + /** + * Changes markedForReuse status which is an atomic boolean value as opposed to y-type data. + * @param operatorID + */ + public removeMarkReuseResult(operatorID: string): void { + const operator = this.getOperator(operatorID); + if (!operator) { + throw new Error(`operator with ID ${operatorID} doesn't exist`); + } + if (!this.isMarkedForReuseResult(operatorID)) { + return; + } + this.sharedModel.operatorIDMap.get(operatorID)?.set("markedForReuse", false); + } + + /** + * This method gets this status from readonly object version of the operator data as opposed to y-type data. + * @param operatorID + */ + public isMarkedForReuseResult(operatorID: string): boolean { + const operator = this.getOperator(operatorID); + if (!operator) { + throw new Error(`operator with ID ${operatorID} doesn't exist`); + } + return operator.markedForReuse ?? false; + } + + public getOperatorsMarkedForReuseResult(): ReadonlySet { + return new Set( + Array.from(this.sharedModel.operatorIDMap.keys() as IterableIterator).filter(op => + this.isMarkedForReuseResult(op) + ) + ); + } + /** * Returns whether the operator exists in the graph. * @param operatorID operator ID @@ -874,11 +932,18 @@ export class WorkflowGraph { return this.commentBoxEditCommentSubject.asObservable(); } - public getCachedOperatorsChangedStream(): Observable<{ - newCached: ReadonlyArray; - newUnCached: ReadonlyArray; + public getViewResultOperatorsChangedStream(): Observable<{ + newViewResultOps: ReadonlyArray; + newUnviewResultOps: ReadonlyArray; + }> { + return this.viewResultOperatorChangedSubject.asObservable(); + } + + public getReuseCacheOperatorsChangedStream(): Observable<{ + newReuseCacheOps: ReadonlyArray; + newUnreuseCacheOps: ReadonlyArray; }> { - return this.cachedOperatorChangedSubject.asObservable(); + return this.reuseOperatorChangedSubject.asObservable(); } public getOperatorDisplayNameChangedStream(): Observable<{ diff --git a/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.spec.ts b/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.spec.ts index b1c749d0795..9789a1642a0 100644 --- a/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.spec.ts +++ b/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.spec.ts @@ -2,10 +2,10 @@ import { TestBed } from "@angular/core/testing"; import { OperatorMetadataService } from "../operator-metadata/operator-metadata.service"; import { StubOperatorMetadataService } from "../operator-metadata/stub-operator-metadata.service"; -import { OperatorCacheStatusService } from "./operator-cache-status.service"; +import { OperatorReuseStatusService } from "./operator-cache-status.service"; xdescribe("OperatorCacheStatusService", () => { - let service: OperatorCacheStatusService; + let service: OperatorReuseStatusService; beforeEach(() => { TestBed.configureTestingModule({ @@ -16,7 +16,7 @@ xdescribe("OperatorCacheStatusService", () => { }, ], }); - service = TestBed.inject(OperatorCacheStatusService); + service = TestBed.inject(OperatorReuseStatusService); }); it("should be created", () => { diff --git a/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.ts b/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.ts index 2e4eb25b33b..85a98f05035 100644 --- a/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.ts +++ b/core/new-gui/src/app/workspace/service/workflow-status/operator-cache-status.service.ts @@ -2,30 +2,29 @@ import { Injectable } from "@angular/core"; import { WorkflowActionService } from "../workflow-graph/model/workflow-action.service"; import { WorkflowWebsocketService } from "../workflow-websocket/workflow-websocket.service"; import { SCHEMA_PROPAGATION_DEBOUNCE_TIME_MS } from "../dynamic-schema/schema-propagation/schema-propagation.service"; -import { debounceTime } from "rxjs/operators"; +import { debounceTime, filter } from "rxjs/operators"; import { ExecuteWorkflowService } from "../execute-workflow/execute-workflow.service"; import { merge } from "rxjs"; import { JointUIService } from "../joint-ui/joint-ui.service"; import { environment } from "src/environments/environment"; +import { ExecutionState } from "../../types/execute-workflow.interface"; @Injectable({ providedIn: "root", }) -export class OperatorCacheStatusService { +export class OperatorReuseStatusService { constructor( private jointUIService: JointUIService, private workflowActionService: WorkflowActionService, - private workflowWebsocketService: WorkflowWebsocketService + private workflowWebsocketService: WorkflowWebsocketService, + private executeWorkflowService: ExecuteWorkflowService ) { - if (!environment.operatorCacheEnabled) { - return; - } this.registerRequestCacheStatusUpdate(); this.registerHandleCacheStatusUpdate(); } /** - * Requests cache status (invalid/valid/toBeCached) when workflow is changed from the engine + * Requests cache status (invalid/valid) when workflow is changed from the engine * for example, when operator is updated, the cache status might be invalidated */ private registerRequestCacheStatusUpdate() { @@ -37,7 +36,10 @@ export class OperatorCacheStatusService { .getOperatorPropertyChangeStream() .pipe(debounceTime(SCHEMA_PROPAGATION_DEBOUNCE_TIME_MS)), this.workflowActionService.getTexeraGraph().getDisabledOperatorsChangedStream(), - this.workflowActionService.getTexeraGraph().getCachedOperatorsChangedStream() + this.workflowActionService.getTexeraGraph().getReuseCacheOperatorsChangedStream(), + this.executeWorkflowService.getExecutionStateStream().pipe(filter(evt => + evt.previous.state !== ExecutionState.Completed && evt.current.state == ExecutionState.Completed + )) ).subscribe(() => { const workflow = ExecuteWorkflowService.getLogicalPlanRequest(this.workflowActionService.getTexeraGraph()); this.workflowWebsocketService.send("CacheStatusUpdateRequest", workflow); @@ -50,17 +52,17 @@ export class OperatorCacheStatusService { private registerHandleCacheStatusUpdate() { this.workflowActionService .getTexeraGraph() - .getCachedOperatorsChangedStream() + .getReuseCacheOperatorsChangedStream() .subscribe(event => { const mainJointPaper = this.workflowActionService.getJointGraphWrapper().getMainJointPaper(); if (!mainJointPaper) { return; } - event.newCached.concat(event.newUnCached).forEach(opID => { + event.newReuseCacheOps.concat(event.newUnreuseCacheOps).forEach(opID => { const op = this.workflowActionService.getTexeraGraph().getOperator(opID); - this.jointUIService.changeOperatorCacheStatus(mainJointPaper, op); + this.jointUIService.changeOperatorReuseCacheStatus(mainJointPaper, op); }); }); this.workflowWebsocketService.subscribeToEvent("CacheStatusUpdateEvent").subscribe(event => { @@ -70,7 +72,7 @@ export class OperatorCacheStatusService { } Object.entries(event.cacheStatusMap).forEach(([opID, cacheStatus]) => { const op = this.workflowActionService.getTexeraGraph().getOperator(opID); - this.jointUIService.changeOperatorCacheStatus(mainJointPaper, op, cacheStatus); + this.jointUIService.changeOperatorReuseCacheStatus(mainJointPaper, op, cacheStatus); }); }); } diff --git a/core/new-gui/src/app/workspace/types/execute-workflow.interface.ts b/core/new-gui/src/app/workspace/types/execute-workflow.interface.ts index 2570ab80452..8377b75bdde 100644 --- a/core/new-gui/src/app/workspace/types/execute-workflow.interface.ts +++ b/core/new-gui/src/app/workspace/types/execute-workflow.interface.ts @@ -37,7 +37,8 @@ export interface LogicalPlan operators: LogicalOperator[]; links: LogicalLink[]; breakpoints: BreakpointInfo[]; - cachedOperatorIds: string[]; + opsToViewResult?: string[]; + opsToReuseResult?: string[]; }> {} /** diff --git a/core/new-gui/src/app/workspace/types/workflow-common.interface.ts b/core/new-gui/src/app/workspace/types/workflow-common.interface.ts index 02ab165eb25..a866dde7c1f 100644 --- a/core/new-gui/src/app/workspace/types/workflow-common.interface.ts +++ b/core/new-gui/src/app/workspace/types/workflow-common.interface.ts @@ -53,7 +53,8 @@ export interface OperatorPredicate dynamicOutputPorts?: boolean; showAdvanced: boolean; isDisabled?: boolean; - isCached?: boolean; + viewResult?: boolean; + markedForReuse?: boolean; customDisplayName?: string; }> {} diff --git a/core/new-gui/src/app/workspace/types/workflow-websocket.interface.ts b/core/new-gui/src/app/workspace/types/workflow-websocket.interface.ts index f41de112e38..dcf37aed381 100644 --- a/core/new-gui/src/app/workspace/types/workflow-websocket.interface.ts +++ b/core/new-gui/src/app/workspace/types/workflow-websocket.interface.ts @@ -95,8 +95,6 @@ export type ResultExportRequest = Readonly<{ operatorName: string; }>; -export type CacheStatusUpdateRequest = LogicalPlan; - export type ResultExportResponse = Readonly<{ status: "success" | "error"; message: string; @@ -112,7 +110,7 @@ export type WorkflowAvailableResultEvent = Readonly<{ availableOperators: ReadonlyArray; }>; -export type OperatorResultCacheStatus = "cache invalid" | "cache valid" | "cache not enabled"; +export type OperatorResultCacheStatus = "cache invalid" | "cache valid"; export interface CacheStatusUpdateEvent extends Readonly<{ @@ -173,7 +171,7 @@ export type WorkflowStateInfo = Readonly<{ export type TexeraWebsocketRequestTypeMap = { RegisterWIdRequest: RegisterWIdRequest; AddBreakpointRequest: BreakpointInfo; - CacheStatusUpdateRequest: CacheStatusUpdateRequest; + CacheStatusUpdateRequest: LogicalPlan; HeartBeatRequest: {}; ModifyLogicRequest: ModifyOperatorLogic; ResultExportRequest: ResultExportRequest; diff --git a/core/new-gui/src/assets/svg/operator-reuse-cache-invalid.svg b/core/new-gui/src/assets/svg/operator-reuse-cache-invalid.svg new file mode 100644 index 00000000000..bd2e51d15b0 --- /dev/null +++ b/core/new-gui/src/assets/svg/operator-reuse-cache-invalid.svg @@ -0,0 +1,3 @@ + diff --git a/core/new-gui/src/assets/svg/operator-reuse-cache-valid.svg b/core/new-gui/src/assets/svg/operator-reuse-cache-valid.svg new file mode 100644 index 00000000000..f62be44dbd5 --- /dev/null +++ b/core/new-gui/src/assets/svg/operator-reuse-cache-valid.svg @@ -0,0 +1,3 @@ + diff --git a/core/new-gui/src/assets/svg/operator-view-result.svg b/core/new-gui/src/assets/svg/operator-view-result.svg new file mode 100644 index 00000000000..14298c81dec --- /dev/null +++ b/core/new-gui/src/assets/svg/operator-view-result.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file From 59e90f9693358d407504948d9d145a24b51d0f22 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 7 Jul 2023 07:37:35 -0700 Subject: [PATCH 09/18] wip --- .../architecture/worker/DataProcessor.scala | 3 +- .../resource/SchemaPropagationResource.scala | 6 + .../workflow/common/ProgressiveUtils.scala | 8 ++ .../metadata/OperatorMetadataGenerator.scala | 3 +- .../aggregate/FinalAggregateOpExec.scala | 119 +++++++++++++----- .../aggregate/PartialAggregateOpExec.scala | 38 ++++-- .../consolidate/ConsolidateOpDesc.scala | 34 +++++ .../consolidate/ConsolidateOpExec.scala | 38 ++++++ .../texera/workflow/common/tuple/Tuple.java | 4 +- .../ProgressiveRetractionEnforcer.scala | 41 ++++++ .../common/workflow/WorkflowCompiler.scala | 20 ++- .../SpecializedAggregateOpDesc.scala | 9 +- .../dictionary/DictionaryMatcherOpDesc.scala | 3 +- .../filter/SpecializedFilterOpDesc.java | 2 +- .../keywordSearch/KeywordSearchOpDesc.scala | 3 +- .../projection/ProjectionOpDesc.scala | 3 +- .../operators/regex/RegexOpDesc.scala | 3 +- .../sentiment/SentimentAnalysisOpDesc.scala | 3 +- .../sink/managed/ProgressiveSinkOpDesc.java | 2 +- .../apis/reddit/RedditSearchSourceOpDesc.java | 1 + .../typecasting/TypeCastingOpDesc.java | 2 +- .../source/PythonUDFSourceOpDescV2.java | 1 + .../operators/union/UnionOpDesc.scala | 3 +- .../unneststring/UnnestStringOpDesc.scala | 3 +- .../barChart/BarChartOpDesc.scala | 85 +++---------- .../barChart/BarChartOpExec.scala | 29 ----- .../lineChart/LineChartOpDesc.scala | 101 +++------------ .../lineChart/LineChartOpExec.scala | 24 ---- .../scatterplot/ScatterplotOpDesc.java | 2 +- .../wordCloud/WordCloudOpDesc.java | 2 +- 30 files changed, 322 insertions(+), 273 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 616438633a0..9f4e935b6f2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -192,7 +192,7 @@ class DataProcessor( // dependencies: } catch safely { case e => // forward input tuple to the user and pause DP thread - handleOperatorException(e) + handleOperatorException(e) } outputIterator } @@ -292,6 +292,7 @@ class DataProcessor( // dependencies: } private[this] def handleOperatorException(e: Throwable): Unit = { + e.printStackTrace() if (currentInputTuple.isLeft) { asyncRPCClient.send( LocalOperatorException(currentInputTuple.left.get, e), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala index cb413a914da..95e8c53c74f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala @@ -40,6 +40,12 @@ class SchemaPropagationResource { val responseContent = schemaPropagationResult.map(e => (e._1.operator, e._2.map(s => s.map(o => o.getAttributesScala))) ) + responseContent.map(kv => { + val schemaWithoutInternalAttrs = kv._2.map(portSchema => { + portSchema.map(attrs => attrs.filter(attr => attr.getName.startsWith("__internal"))) + }) + (kv._1, schemaWithoutInternalAttrs) + }) SchemaPropagationResponse(0, responseContent, null) } catch { case e: Throwable => diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/ProgressiveUtils.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/ProgressiveUtils.scala index 67a42bde311..befac8aa088 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/ProgressiveUtils.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/ProgressiveUtils.scala @@ -20,11 +20,19 @@ object ProgressiveUtils { Tuple.newBuilder(outputSchema).add(insertRetractFlagAttr, true).add(tuple).build } + def addInsertionFlag(fields: Array[Object], outputSchema: Schema): Tuple = { + Tuple.newBuilder(outputSchema).add(insertRetractFlagAttr, true).addSequentially(fields).build + } + def addRetractionFlag(tuple: Tuple, outputSchema: Schema): Tuple = { assert(!tuple.getSchema.containsAttribute(insertRetractFlagAttr.getName)) Tuple.newBuilder(outputSchema).add(insertRetractFlagAttr, false).add(tuple).build } + def addRetractionFlag(fields: Array[Object], outputSchema: Schema): Tuple = { + Tuple.newBuilder(outputSchema).add(insertRetractFlagAttr, false).addSequentially(fields).build + } + def isInsertion(tuple: Tuple): Boolean = { if (tuple.getSchema.containsAttribute(insertRetractFlagAttr.getName)) { tuple.getField[Boolean](insertRetractFlagAttr.getName) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala index c06528c2e3d..6162e5ffbd3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala @@ -32,7 +32,8 @@ case class OperatorInfo( dynamicInputPorts: Boolean = false, dynamicOutputPorts: Boolean = false, supportReconfiguration: Boolean = false, - allowPortCustomization: Boolean = false + allowPortCustomization: Boolean = false, + supportRetractableInput: Boolean = false, ) case class OperatorMetadata( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala index 769a0de2b49..7f0513598f1 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala @@ -3,6 +3,7 @@ package edu.uci.ics.texera.workflow.common.operators.aggregate import edu.uci.ics.amber.engine.architecture.worker.PauseManager import edu.uci.ics.amber.engine.common.InputExhausted import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient +import edu.uci.ics.texera.workflow.common.ProgressiveUtils.{addInsertionFlag, addRetractionFlag} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.operators.aggregate.PartialAggregateOpExec.internalAggObjKey import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -10,6 +11,9 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchem import scala.collection.mutable +//import scala.collection.mutable +//import scala.collection.mutable.ArrayBuffer + class FinalAggregateOpExec( val aggFuncs: List[DistributedAggregation[Object]], val groupByKeys: List[String], @@ -17,11 +21,18 @@ class FinalAggregateOpExec( ) extends OperatorExecutor { var groupByKeyAttributes: Array[Attribute] = _ - var schema: Schema = _ + var outputSchema: Schema = operatorSchemaInfo.outputSchemas(0) // each value in partialObjectsPerKey has the same length as aggFuncs // partialObjectsPerKey(key)[i] corresponds to aggFuncs[i] - var partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() + private var partialObjectsPerKey = Map[List[Object], List[Object]]() + + // for incremental computation + val UPDATE_INTERVAL_MS = 1000 + private var lastUpdatedTime: Long = 0 + private var counterSinceLastUpdate: Long = 0 + + private var previousAggResults = Map[List[Object], List[Object]]() override def open(): Unit = {} override def close(): Unit = {} @@ -37,37 +48,89 @@ class FinalAggregateOpExec( } tuple match { case Left(t) => - val key = - if (groupByKeys == null || groupByKeys.isEmpty) List() - else groupByKeys.map(k => t.getField[Object](k)) - - val partialObjects = - aggFuncs.indices.map(i => t.getField[Object](internalAggObjKey(i))).toList - if (!partialObjectsPerKey.contains(key)) { - partialObjectsPerKey.put(key, partialObjects) - } else { - val updatedPartialObjects = aggFuncs.indices - .map(i => { - val aggFunc = aggFuncs(i) - val partial1 = partialObjectsPerKey(key)(i) - val partial2 = partialObjects(i) - aggFunc.merge(partial1, partial2) - }) - .toList - partialObjectsPerKey.put(key, updatedPartialObjects) - } - Iterator() + counterSinceLastUpdate += 1 + insertPartialInput(t) + + val condition: Boolean = System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS + if (condition) + outputDiff() + else + Iterator() + case Right(_) => - partialObjectsPerKey.iterator.map(pair => { - val finalAggValues = aggFuncs.indices.map(i => aggFuncs(i).finalAgg(pair._2(i))) + outputDiff() +// partialObjectsPerKey.iterator.map(pair => { +// val finalAggValues = aggFuncs.indices.map(i => aggFuncs(i).finalAgg(pair._2(i))) +// +// val tupleBuilder = Tuple.newBuilder(operatorSchemaInfo.outputSchemas(0)) +// // add group by keys and final agg values +// tupleBuilder.addSequentially((pair._1 ++ finalAggValues).toArray) +// +// tupleBuilder.build() +// }) + } + } + + private def outputDiff(): Iterator[Tuple] = { + val resultIterator = calculateDiff() + + counterSinceLastUpdate = 0 + lastUpdatedTime = System.currentTimeMillis + previousAggResults = partialObjectsPerKey + resultIterator + } + + private def calculateDiff(): Iterator[Tuple] = { + // find differences + + val retractions = new mutable.ArrayBuffer[Tuple]() + val insertions = new mutable.ArrayBuffer[Tuple]() - val tupleBuilder = Tuple.newBuilder(operatorSchemaInfo.outputSchemas(0)) - // add group by keys and final agg values - tupleBuilder.addSequentially((pair._1 ++ finalAggValues).toArray) + partialObjectsPerKey.keySet.foreach(k => { + if (! previousAggResults.contains(k)) { + val newFields = finalAggregate(k, partialObjectsPerKey(k)) + insertions.append(addInsertionFlag(newFields, outputSchema)) + } else if (previousAggResults(k) != partialObjectsPerKey(k)) { + val prevFields = finalAggregate(k, previousAggResults(k)) + retractions.append(addRetractionFlag(prevFields, outputSchema)) + val newFields = finalAggregate(k, partialObjectsPerKey(k)) + insertions.append(addInsertionFlag(newFields, outputSchema)) + } + }) - tupleBuilder.build() + val results = retractions ++ insertions + results.foreach(r => { + System.err.println("aggResult: " + r.getFields) + }) + + results.iterator + } + + private def insertPartialInput(t: Tuple): Unit = { + val key = + if (groupByKeys == null || groupByKeys.isEmpty) List() + else groupByKeys.map(k => t.getField[Object](k)) + + val partialObjects = + aggFuncs.indices.map(i => t.getField[Object](internalAggObjKey(i))).toList + if (!partialObjectsPerKey.contains(key)) { + partialObjectsPerKey += (key -> partialObjects) + } else { + val updatedPartialObjects = aggFuncs.indices + .map(i => { + val aggFunc = aggFuncs(i) + val partial1 = partialObjectsPerKey(key)(i) + val partial2 = partialObjects(i) + aggFunc.merge(partial1, partial2) }) + .toList + partialObjectsPerKey += (key -> updatedPartialObjects) } } + private def finalAggregate(key: List[Object], value: List[Object]): Array[Object] = { + val finalAggValues = aggFuncs.indices.map(i => aggFuncs(i).finalAgg(value(i))) + (key ++ finalAggValues).toArray + } + } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala index d7dfbff2f47..2b4c5545f45 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala @@ -6,14 +6,10 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.operators.aggregate.PartialAggregateOpExec.internalAggObjKey import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.{ - Attribute, - AttributeType, - OperatorSchemaInfo, - Schema -} +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, OperatorSchemaInfo, Schema} -import scala.collection.mutable +import java.util.Collections +import scala.collection.{JavaConverters, mutable} import scala.jdk.CollectionConverters.asJavaIterableConverter object PartialAggregateOpExec { @@ -39,6 +35,10 @@ class PartialAggregateOpExec( var partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() + // for incremental computation + val UPDATE_INTERVAL_MS = 500 + private var lastUpdatedTime: Long = 0 + override def open(): Unit = {} override def close(): Unit = {} @@ -67,13 +67,27 @@ class PartialAggregateOpExec( aggFunc.iterate(partial, t) } partialObjectsPerKey.put(key, updatedPartialObjects) - Iterator() + + val condition = System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS + if (condition) { + lastUpdatedTime = System.currentTimeMillis + val resultIterator = getPartialOutputs() + this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() + resultIterator + } + else Iterator() case Right(_) => - partialObjectsPerKey.iterator.map(pair => { - val tupleFields = pair._1 ++ pair._2 - Tuple.newBuilder(schema).addSequentially(tupleFields.toArray).build() - }) + val resultIterator = getPartialOutputs() + this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() + resultIterator } } + private def getPartialOutputs(): scala.Iterator[Tuple] = { + partialObjectsPerKey.iterator.map(pair => { + val tupleFields = pair._1 ++ pair._2 + Tuple.newBuilder(schema).addSequentially(tupleFields.toArray).build() + }) + } + } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala new file mode 100644 index 00000000000..6ea5e86d0d7 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala @@ -0,0 +1,34 @@ +package edu.uci.ics.texera.workflow.common.operators.consolidate + +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig +import edu.uci.ics.texera.workflow.common.ProgressiveUtils +import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} +import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpExec + +import scala.collection.JavaConverters.asScalaBuffer +import scala.collection.immutable.List + +class ConsolidateOpDesc extends OperatorDescriptor{ + override def operatorInfo: OperatorInfo = { + OperatorInfo( + "Consolidate", + "Consolidate retractable inputs, collect all of them and output append-only data", + OperatorGroupConstants.UTILITY_GROUP, + List(InputPort("")), + List(OutputPort("")), + supportRetractableInput = true, + ) + } + + override def getOutputSchema(schemas: Array[Schema]): Schema = { + val newAttrs = asScalaBuffer(schemas(0).getAttributes) + .filter(attr => attr == ProgressiveUtils.insertRetractFlagAttr) + Schema.newBuilder().add(newAttrs.toArray :_*).build() + } + + override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo): OpExecConfig = { + OpExecConfig.manyToOneLayer(operatorIdentifier, _ => new ConsolidateOpExec(operatorSchemaInfo)) + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala new file mode 100644 index 00000000000..424f6638fdf --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala @@ -0,0 +1,38 @@ +package edu.uci.ics.texera.workflow.common.operators.consolidate + +import edu.uci.ics.amber.engine.architecture.worker.PauseManager +import edu.uci.ics.amber.engine.common.InputExhausted +import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient +import edu.uci.ics.texera.workflow.common.ProgressiveUtils +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.OperatorSchemaInfo + +import scala.collection.mutable.ArrayBuffer + +class ConsolidateOpExec(operatorSchemaInfo: OperatorSchemaInfo) extends OperatorExecutor { + + private val results = new ArrayBuffer[Tuple]() + + override def processTexeraTuple(tuple: Either[Tuple, InputExhausted], input: Int, pauseManager: PauseManager, asyncRPCClient: AsyncRPCClient): Iterator[Tuple] = { + + tuple match { + case Left(t) => + val (isInsertion, tupleValue) = + ProgressiveUtils.getTupleFlagAndValue(t, operatorSchemaInfo) + if (isInsertion) { + results += tupleValue + } else { + results -= tupleValue + } + Iterator() + case Right(_) => + results.iterator + } + + } + + override def open(): Unit = {} + + override def close(): Unit = {} +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/tuple/Tuple.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/tuple/Tuple.java index 79e611b9e25..c22563c74b2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/tuple/Tuple.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/tuple/Tuple.java @@ -266,9 +266,9 @@ public BuilderV2 add(String attributeName, AttributeType attributeType, Object f */ public BuilderV2 addSequentially(Object[] fields) { checkNotNull(fields); - checkSchemaMatchesFields(schema.getAttributes(), Lists.newArrayList(fields)); + int startIndex = this.fieldNameMap.size(); for (int i = 0; i < fields.length; i++) { - this.add(schema.getAttributes().get(i), fields[i]); + this.add(schema.getAttributes().get(startIndex + i), fields[i]); } return this; } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala new file mode 100644 index 00000000000..5c61e51bad5 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala @@ -0,0 +1,41 @@ +package edu.uci.ics.texera.workflow.common.workflow + +import edu.uci.ics.texera.workflow.common.ProgressiveUtils +import edu.uci.ics.texera.workflow.common.operators.consolidate.ConsolidateOpDesc + +import scala.collection.mutable.ArrayBuffer + +object ProgressiveRetractionEnforcer { + + def enforceDelta(logicalPlan: LogicalPlan): LogicalPlan = { + // first find the edges that we need to add the consolidate operator + val edgesToAddConsolidateOp = new ArrayBuffer[OperatorLink]() + logicalPlan.outputSchemaMap.foreach(kv => { + val op = kv._1 + val outSchemas = kv._2 + logicalPlan.getDownstreamEdges(op.operator).zip(outSchemas).foreach(out => { + val outEdge = out._1 + val outSchema = out._2 + if (outSchema.containsAttribute(ProgressiveUtils.insertRetractFlagAttr.getName)) { + val downstreamOp = logicalPlan.getOperator(outEdge.destination.operatorID) + if (! downstreamOp.operatorInfo.supportRetractableInput) { + edgesToAddConsolidateOp.append(outEdge) + } + } + }) + }) + + var resultPlan = logicalPlan + edgesToAddConsolidateOp.foreach(edge => { + val newOp = new ConsolidateOpDesc() + resultPlan = resultPlan.removeEdge(edge) + resultPlan = resultPlan.addOperator(newOp) + .addEdge(edge.origin.operatorID, newOp.operatorID, edge.origin.portOrdinal, 0) + .addEdge(newOp.operatorID, edge.destination.operatorID, 0, edge.destination.portOrdinal) + }) + + resultPlan + } + + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index 76b0b0281cf..f889ad956f2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -6,10 +6,13 @@ import edu.uci.ics.amber.engine.common.virtualidentity.WorkflowIdentity import edu.uci.ics.texera.web.service.WorkflowCacheChecker import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage -import edu.uci.ics.texera.workflow.common.{ConstraintViolation, WorkflowContext} +import edu.uci.ics.texera.workflow.common.{ConstraintViolation, ProgressiveUtils, WorkflowContext} import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.visualization.VisualizationConstants +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + object WorkflowCompiler { def isSink(operatorID: String, workflowCompiler: WorkflowCompiler): Boolean = { @@ -70,16 +73,23 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex opResultStorage: OpResultStorage, lastCompletedJob: LogicalPlan = null ): Workflow = { + + // do rewriting to use previous cache results val cacheReuses = new WorkflowCacheChecker(lastCompletedJob, logicalPlan).getValidCacheReuse() val opsToReuseCache = cacheReuses.intersect(logicalPlan.opsToReuseCache.toSet) - val rewrittenLogicalPlan = + val logicalPlan1 = WorkflowCacheRewriter.transform(logicalPlan, opResultStorage, opsToReuseCache) - rewrittenLogicalPlan.operatorMap.values.foreach(initOperator) + logicalPlan1.operatorMap.values.foreach(initOperator) assignSinkStorage(logicalPlan, opResultStorage, opsToReuseCache) - assignSinkStorage(rewrittenLogicalPlan, opResultStorage, opsToReuseCache) + assignSinkStorage(logicalPlan1, opResultStorage, opsToReuseCache) + + // add necessary consolidate operator if an operator can't handle retractable inputs + val logicalPlan2 = ProgressiveRetractionEnforcer.enforceDelta(logicalPlan1) + - val physicalPlan0 = rewrittenLogicalPlan.toPhysicalPlan(this.context, opResultStorage) + // convert to physical plan + val physicalPlan0 = logicalPlan2.toPhysicalPlan(this.context, opResultStorage) // create pipelined regions. val physicalPlan1 = new WorkflowPipelinedRegionsBuilder( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala index 20a67601ce6..2d0dd461dca 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala @@ -2,13 +2,9 @@ package edu.uci.ics.texera.workflow.operators.aggregate import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.uci.ics.texera.workflow.common.ProgressiveUtils import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeNameList -import edu.uci.ics.texera.workflow.common.metadata.{ - InputPort, - OperatorGroupConstants, - OperatorInfo, - OutputPort -} +import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} import edu.uci.ics.texera.workflow.common.operators.aggregate.AggregateOpDesc import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} import edu.uci.ics.texera.workflow.common.workflow.PhysicalPlan @@ -71,6 +67,7 @@ class SpecializedAggregateOpDesc extends AggregateOpDesc { } Schema .newBuilder() + .add(ProgressiveUtils.insertRetractFlagAttr) .add(getGroupByKeysSchema(schemas).getAttributes) .add(aggregations.map(agg => agg.getAggregationAttribute(schemas(0))).asJava) .build() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala index b6938f03d4a..62e7da505b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala @@ -44,7 +44,8 @@ class DictionaryMatcherOpDesc extends MapOpDesc { OperatorGroupConstants.SEARCH_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), - supportReconfiguration = true + supportReconfiguration = true, + supportRetractableInput = true, ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/filter/SpecializedFilterOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/filter/SpecializedFilterOpDesc.java index 1ff1fa7daca..6dcf9a63314 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/filter/SpecializedFilterOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/filter/SpecializedFilterOpDesc.java @@ -39,6 +39,6 @@ public OperatorInfo operatorInfo() { OperatorGroupConstants.SEARCH_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), asScalaBuffer(singletonList(new OutputPort(""))).toList(), - false, false, true, false); + false, false, true, false, true); } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala index 618648e8dbb..2c6db894280 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala @@ -37,6 +37,7 @@ class KeywordSearchOpDesc extends FilterOpDesc { operatorGroupName = OperatorGroupConstants.SEARCH_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), - supportReconfiguration = true + supportReconfiguration = true, + supportRetractableInput = true, ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala index fd43c9c4761..04d2328659f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala @@ -55,7 +55,8 @@ class ProjectionOpDesc extends MapOpDesc { OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), - supportReconfiguration = false + supportReconfiguration = false, + supportRetractableInput = true, ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala index 2538747258d..e4addb55ddd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala @@ -40,6 +40,7 @@ class RegexOpDesc extends FilterOpDesc { operatorGroupName = OperatorGroupConstants.SEARCH_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), - supportReconfiguration = true + supportReconfiguration = true, + supportRetractableInput = true, ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala index f9906c4740b..12029c2fd4f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala @@ -53,7 +53,8 @@ class SentimentAnalysisOpDesc extends MapOpDesc { OperatorGroupConstants.ANALYTICS_GROUP, List(InputPort("")), List(OutputPort("")), - supportReconfiguration = true + supportReconfiguration = true, + supportRetractableInput = true, ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java index 8f52cf6f70b..4845aea20be 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sink/managed/ProgressiveSinkOpDesc.java @@ -58,7 +58,7 @@ public OperatorInfo operatorInfo() { "View the edu.uci.ics.texera.workflow results", OperatorGroupConstants.UTILITY_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), - List.empty(), false, false, false, false); + List.empty(), false, false, false, false, true); } @Override diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/apis/reddit/RedditSearchSourceOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/apis/reddit/RedditSearchSourceOpDesc.java index da1d2b9fb1a..d6712455859 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/apis/reddit/RedditSearchSourceOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/source/apis/reddit/RedditSearchSourceOpDesc.java @@ -130,6 +130,7 @@ public OperatorInfo operatorInfo() { false, false, false, + false, false ); } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/typecasting/TypeCastingOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/typecasting/TypeCastingOpDesc.java index a78e40252de..432691699cc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/typecasting/TypeCastingOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/typecasting/TypeCastingOpDesc.java @@ -45,7 +45,7 @@ public OperatorInfo operatorInfo() { OperatorGroupConstants.UTILITY_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), asScalaBuffer(singletonList(new OutputPort(""))).toList(), - false, false, false, false); + false, false, false, false, true); } @Override diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java index c0249a8b91a..f8bb7552ba8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java @@ -73,6 +73,7 @@ public OperatorInfo operatorInfo() { false, false, true, + false, false ); } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala index 043f1231f0e..8d12bac88f2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala @@ -23,7 +23,8 @@ class UnionOpDesc extends OperatorDescriptor { "Unions the output rows from multiple input operators", OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort(allowMultiInputs = true)), - outputPorts = List(OutputPort()) + outputPorts = List(OutputPort()), + supportRetractableInput = true, ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala index 0cc3f58d7d0..94278c0e6e8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala @@ -34,7 +34,8 @@ class UnnestStringOpDesc extends FlatMapOpDesc { "Unnest the string values in the column separated by a delimiter to multiple values", operatorGroupName = OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), - outputPorts = List(OutputPort()) + outputPorts = List(OutputPort()), + supportRetractableInput = true, ) override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo) = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpDesc.scala index 57e3c91a464..0af3fe6b017 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpDesc.scala @@ -1,33 +1,12 @@ package edu.uci.ics.texera.workflow.operators.visualization.barChart -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig -import edu.uci.ics.amber.engine.common.virtualidentity.util.makeLayer -import edu.uci.ics.texera.workflow.common.metadata.{ - InputPort, - OperatorGroupConstants, - OperatorInfo, - OutputPort -} -import edu.uci.ics.texera.workflow.common.metadata.annotations.{ - AutofillAttributeName, - AutofillAttributeNameList -} -import edu.uci.ics.texera.workflow.common.tuple.schema.{ - Attribute, - AttributeType, - OperatorSchemaInfo, - Schema -} -import edu.uci.ics.texera.workflow.operators.aggregate.{ - AggregationFunction, - AggregationOperation, - SpecializedAggregateOpDesc -} -import edu.uci.ics.texera.workflow.operators.visualization.{ - VisualizationConstants, - VisualizationOperator -} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameList} +import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} +import edu.uci.ics.texera.workflow.operators.aggregate.{AggregationFunction, AggregationOperation, SpecializedAggregateOpDesc} +import edu.uci.ics.texera.workflow.operators.visualization.{VisualizationConstants, VisualizationOperator} import java.util.Collections.singletonList import scala.jdk.CollectionConverters.asScalaBuffer @@ -52,11 +31,8 @@ class BarChartOpDesc extends VisualizationOperator { def resultAttributeNames: List[String] = if (noDataCol) List("count") else dataColumns - override def operatorExecutorMultiLayer(operatorSchemaInfo: OperatorSchemaInfo) = { - if (nameColumn == null || nameColumn == "") { - throw new RuntimeException("bar chart: name column is null or empty") - } - + @JsonIgnore + lazy val aggOperator: SpecializedAggregateOpDesc = { val aggOperator = new SpecializedAggregateOpDesc() aggOperator.context = this.context aggOperator.operatorID = this.operatorID @@ -78,21 +54,20 @@ class BarChartOpDesc extends VisualizationOperator { aggOperator.aggregations = aggOperations aggOperator.groupByKeys = List(nameColumn) } + aggOperator + } + + override def operatorExecutorMultiLayer(operatorSchemaInfo: OperatorSchemaInfo) = { + if (nameColumn == null || nameColumn == "") { + throw new RuntimeException("bar chart: name column is null or empty") + } - val aggPlan = aggOperator.aggregateOperatorExecutor( + aggOperator.aggregateOperatorExecutor( OperatorSchemaInfo( operatorSchemaInfo.inputSchemas, Array(aggOperator.getOutputSchema(operatorSchemaInfo.inputSchemas)) ) ) - - val barChartViz = OpExecConfig.oneToOneLayer( - makeLayer(operatorIdentifier, "visualize"), - _ => new BarChartOpExec(this, operatorSchemaInfo) - ) - - val finalAggOp = aggPlan.sinkOperators.head - aggPlan.addOperator(barChartViz).addEdge(finalAggOp, barChartViz.id) } override def operatorInfo: OperatorInfo = @@ -105,33 +80,7 @@ class BarChartOpDesc extends VisualizationOperator { ) override def getOutputSchema(schemas: Array[Schema]): Schema = { - Schema - .newBuilder() - .add(getGroupByKeysSchema(schemas).getAttributes) - .add(getFinalAggValueSchema.getAttributes) - .build() - } - - private def getGroupByKeysSchema(schemas: Array[Schema]): Schema = { - val groupByKeys = List(this.nameColumn) - Schema - .newBuilder() - .add(groupByKeys.map(key => schemas(0).getAttribute(key)).toArray: _*) - .build() - } - - private def getFinalAggValueSchema: Schema = { - if (noDataCol) { - Schema - .newBuilder() - .add(resultAttributeNames.head, AttributeType.INTEGER) - .build() - } else { - Schema - .newBuilder() - .add(resultAttributeNames.map(key => new Attribute(key, AttributeType.DOUBLE)).toArray: _*) - .build() - } + aggOperator.getOutputSchema(schemas) } override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo): OpExecConfig = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpExec.scala deleted file mode 100644 index 50cadf213c2..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/barChart/BarChartOpExec.scala +++ /dev/null @@ -1,29 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.visualization.barChart - -import edu.uci.ics.texera.workflow.common.operators.map.MapOpExec -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.OperatorSchemaInfo - -/** - * Simply mocks the data. - * @param opDesc BarChartOpDesc. - * @param operatorSchemaInfo The descriptor's schema info. - */ -class BarChartOpExec( - opDesc: BarChartOpDesc, - operatorSchemaInfo: OperatorSchemaInfo -) extends MapOpExec { - - setMapFunc(this.processTuple) - - def processTuple(t: Tuple): Tuple = { - val builder = Tuple.newBuilder(operatorSchemaInfo.outputSchemas(0)) - val inputSchema = t.getSchema - builder.add(inputSchema.getAttribute(opDesc.nameColumn), t.getField(opDesc.nameColumn)) - for (i <- opDesc.resultAttributeNames.indices) { - val dataName = opDesc.resultAttributeNames.apply(i) - builder.add(inputSchema.getAttribute(dataName), t.getField(dataName)) - } - builder.build - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala index 4b15e44a53d..0ec9732351e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala @@ -2,32 +2,11 @@ package edu.uci.ics.texera.workflow.operators.visualization.lineChart import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig -import edu.uci.ics.amber.engine.common.virtualidentity.util.makeLayer -import edu.uci.ics.texera.workflow.common.metadata.annotations.{ - AutofillAttributeName, - AutofillAttributeNameList -} -import edu.uci.ics.texera.workflow.common.metadata.{ - InputPort, - OperatorGroupConstants, - OperatorInfo, - OutputPort -} -import edu.uci.ics.texera.workflow.common.tuple.schema.{ - Attribute, - AttributeType, - OperatorSchemaInfo, - Schema -} -import edu.uci.ics.texera.workflow.operators.aggregate.{ - AggregationFunction, - AggregationOperation, - SpecializedAggregateOpDesc -} -import edu.uci.ics.texera.workflow.operators.visualization.{ - VisualizationConstants, - VisualizationOperator -} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameList} +import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} +import edu.uci.ics.texera.workflow.operators.aggregate.{AggregationFunction, AggregationOperation, SpecializedAggregateOpDesc} +import edu.uci.ics.texera.workflow.operators.visualization.{VisualizationConstants, VisualizationOperator} import java.util.Collections.singletonList import scala.jdk.CollectionConverters.asScalaBuffer @@ -44,22 +23,14 @@ class LineChartOpDesc extends VisualizationOperator { @JsonProperty(value = "chart style", required = true, defaultValue = VisualizationConstants.LINE) var lineChartEnum: LineChartEnum = _ - @JsonIgnore - private var groupBySchema: Schema = _ - @JsonIgnore - private var finalAggValueSchema: Schema = _ - override def chartType: String = lineChartEnum.getChartStyle def noDataCol: Boolean = dataColumns == null || dataColumns.isEmpty def resultAttributeNames: List[String] = if (noDataCol) List("count") else dataColumns - override def operatorExecutorMultiLayer(operatorSchemaInfo: OperatorSchemaInfo) = { - if (nameColumn == null || nameColumn == "") { - throw new RuntimeException("line chart: name column is null or empty") - } - + @JsonIgnore + lazy val aggOperator: SpecializedAggregateOpDesc = { val aggOperator = new SpecializedAggregateOpDesc() aggOperator.context = this.context aggOperator.operatorID = this.operatorID @@ -81,21 +52,20 @@ class LineChartOpDesc extends VisualizationOperator { aggOperator.aggregations = aggOperations aggOperator.groupByKeys = List(nameColumn) } + aggOperator + } - val aggPlan = aggOperator.aggregateOperatorExecutor( + override def operatorExecutorMultiLayer(operatorSchemaInfo: OperatorSchemaInfo) = { + if (nameColumn == null || nameColumn == "") { + throw new RuntimeException("line chart: name column is null or empty") + } + + aggOperator.aggregateOperatorExecutor( OperatorSchemaInfo( operatorSchemaInfo.inputSchemas, Array(aggOperator.getOutputSchema(operatorSchemaInfo.inputSchemas)) ) ) - - val lineChartOpExec = OpExecConfig.oneToOneLayer( - makeLayer(operatorIdentifier, "visualize"), - _ => new LineChartOpExec(this, operatorSchemaInfo) - ) - - val finalAggOp = aggPlan.sinkOperators.head - aggPlan.addOperator(lineChartOpExec).addEdge(finalAggOp, lineChartOpExec.id) } override def operatorInfo: OperatorInfo = @@ -108,46 +78,7 @@ class LineChartOpDesc extends VisualizationOperator { ) override def getOutputSchema(schemas: Array[Schema]): Schema = { - Schema - .newBuilder() - .add(getGroupByKeysSchema(schemas).getAttributes) - .add(getFinalAggValueSchema.getAttributes) - .build() - } - - private def getGroupByKeysSchema(schemas: Array[Schema]): Schema = { - val groupByKeys = List(this.nameColumn) - Schema - .newBuilder() - .add(groupByKeys.map(key => schemas(0).getAttribute(key)).toArray: _*) - .build() - } - - private def getFinalAggValueSchema: Schema = { - if (noDataCol) { - Schema - .newBuilder() - .add(resultAttributeNames.head, AttributeType.INTEGER) - .build() - } else { - Schema - .newBuilder() - .add(resultAttributeNames.map(key => new Attribute(key, AttributeType.DOUBLE)).toArray: _*) - .build() - } - } - - def groupByFunc(): Schema => Schema = { schema => - { - // Since this is a partially evaluated tuple, there is no actual schema for this - // available anywhere. Constructing it once for re-use - if (groupBySchema == null) { - val schemaBuilder = Schema.newBuilder() - schemaBuilder.add(schema.getAttribute(nameColumn)) - groupBySchema = schemaBuilder.build - } - groupBySchema - } + aggOperator.getOutputSchema(schemas) } override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo): OpExecConfig = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpExec.scala deleted file mode 100644 index 6af7ecc9fc1..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpExec.scala +++ /dev/null @@ -1,24 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.visualization.lineChart - -import edu.uci.ics.texera.workflow.common.operators.map.MapOpExec -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.OperatorSchemaInfo - -class LineChartOpExec( - opDesc: LineChartOpDesc, - operatorSchemaInfo: OperatorSchemaInfo -) extends MapOpExec { - - setMapFunc(this.processTuple) - - def processTuple(t: Tuple): Tuple = { - val builder = Tuple.newBuilder(operatorSchemaInfo.outputSchemas(0)) - val inputSchema = t.getSchema - builder.add(inputSchema.getAttribute(opDesc.nameColumn), t.getField(opDesc.nameColumn)) - for (i <- opDesc.resultAttributeNames.indices) { - val dataName = opDesc.resultAttributeNames.apply(i) - builder.add(inputSchema.getAttribute(dataName), t.getField(dataName)) - } - builder.build - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/scatterplot/ScatterplotOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/scatterplot/ScatterplotOpDesc.java index 07d91675f8d..dd08072fca1 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/scatterplot/ScatterplotOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/scatterplot/ScatterplotOpDesc.java @@ -99,7 +99,7 @@ public OperatorInfo operatorInfo() { OperatorGroupConstants.VISUALIZATION_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), asScalaBuffer(singletonList(new OutputPort(""))).toList(), - false, false, false, false); + false, false, false, false, false); } @Override diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/wordCloud/WordCloudOpDesc.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/wordCloud/WordCloudOpDesc.java index cfa987a77d9..335ee29231c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/wordCloud/WordCloudOpDesc.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/wordCloud/WordCloudOpDesc.java @@ -97,7 +97,7 @@ public OperatorInfo operatorInfo() { OperatorGroupConstants.VISUALIZATION_GROUP(), asScalaBuffer(singletonList(new InputPort("", false))).toList(), asScalaBuffer(singletonList(new OutputPort(""))).toList(), - false, false, false, false); + false, false, false, false, false); } @Override From b4c81163ad0bd00e8fb27645223fb0387d4c295e Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 7 Jul 2023 11:05:45 -0700 Subject: [PATCH 10/18] wip --- .../common/operators/OperatorDescriptor.scala | 19 +-- .../consolidate/ConsolidateOpDesc.scala | 2 +- .../ProgressiveRetractionEnforcer.scala | 5 +- .../common/workflow/WorkflowCompiler.scala | 2 +- .../hashJoin/IncrementalJoinOpDesc.scala | 83 ++++++++++++ .../hashJoin/IncrementalJoinOpExec.scala | 123 ++++++++++++++++++ 6 files changed, 216 insertions(+), 18 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala index cda45a23ce7..b6a7929c5f5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala @@ -17,7 +17,7 @@ import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpDesc import edu.uci.ics.texera.workflow.operators.distinct.DistinctOpDesc import edu.uci.ics.texera.workflow.operators.download.BulkDownloaderOpDesc import edu.uci.ics.texera.workflow.operators.filter.SpecializedFilterOpDesc -import edu.uci.ics.texera.workflow.operators.hashJoin.HashJoinOpDesc +import edu.uci.ics.texera.workflow.operators.hashJoin.{HashJoinOpDesc, IncrementalJoinOpDesc, IncrementalJoinOpExec} import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc @@ -31,18 +31,12 @@ import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ - TwitterFullArchiveSearchSourceOpDesc, - TwitterSearchSourceOpDesc -} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csvOld.CSVOldScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.json.JSONLScanSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.scan.text.{ - TextInputSourceOpDesc, - TextScanSourceOpDesc -} +import edu.uci.ics.texera.workflow.operators.source.scan.text.{TextInputSourceOpDesc, TextScanSourceOpDesc} import edu.uci.ics.texera.workflow.operators.source.sql.asterixdb.AsterixDBSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc @@ -50,11 +44,7 @@ import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{ - DualInputPortsPythonUDFOpDescV2, - PythonLambdaFunctionOpDesc, - PythonUDFOpDescV2 -} +import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonUDFOpDescV2} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc import edu.uci.ics.texera.workflow.operators.visualization.barChart.BarChartOpDesc @@ -120,6 +110,7 @@ trait StateTransferFunc new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), + new Type(value = classOf[IncrementalJoinOpDesc[String]], name = "IncrementalJoin"), new Type(value = classOf[DistinctOpDesc], name = "Distinct"), new Type(value = classOf[IntersectOpDesc], name = "Intersect"), new Type(value = classOf[SymmetricDifferenceOpDesc], name = "SymmetricDifference"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala index 6ea5e86d0d7..e3704e5012d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala @@ -24,7 +24,7 @@ class ConsolidateOpDesc extends OperatorDescriptor{ override def getOutputSchema(schemas: Array[Schema]): Schema = { val newAttrs = asScalaBuffer(schemas(0).getAttributes) - .filter(attr => attr == ProgressiveUtils.insertRetractFlagAttr) + .filter(attr => attr != ProgressiveUtils.insertRetractFlagAttr) Schema.newBuilder().add(newAttrs.toArray :_*).build() } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala index 5c61e51bad5..e519af389e8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala @@ -1,13 +1,13 @@ package edu.uci.ics.texera.workflow.common.workflow -import edu.uci.ics.texera.workflow.common.ProgressiveUtils +import edu.uci.ics.texera.workflow.common.{ProgressiveUtils, WorkflowContext} import edu.uci.ics.texera.workflow.common.operators.consolidate.ConsolidateOpDesc import scala.collection.mutable.ArrayBuffer object ProgressiveRetractionEnforcer { - def enforceDelta(logicalPlan: LogicalPlan): LogicalPlan = { + def enforceDelta(logicalPlan: LogicalPlan, context: WorkflowContext): LogicalPlan = { // first find the edges that we need to add the consolidate operator val edgesToAddConsolidateOp = new ArrayBuffer[OperatorLink]() logicalPlan.outputSchemaMap.foreach(kv => { @@ -28,6 +28,7 @@ object ProgressiveRetractionEnforcer { var resultPlan = logicalPlan edgesToAddConsolidateOp.foreach(edge => { val newOp = new ConsolidateOpDesc() + newOp.setContext(context) resultPlan = resultPlan.removeEdge(edge) resultPlan = resultPlan.addOperator(newOp) .addEdge(edge.origin.operatorID, newOp.operatorID, edge.origin.portOrdinal, 0) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index f889ad956f2..49447751b20 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -85,7 +85,7 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex assignSinkStorage(logicalPlan1, opResultStorage, opsToReuseCache) // add necessary consolidate operator if an operator can't handle retractable inputs - val logicalPlan2 = ProgressiveRetractionEnforcer.enforceDelta(logicalPlan1) + val logicalPlan2 = ProgressiveRetractionEnforcer.enforceDelta(logicalPlan1, context) // convert to physical plan diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala new file mode 100644 index 00000000000..20e34e37b40 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala @@ -0,0 +1,83 @@ +package edu.uci.ics.texera.workflow.operators.hashJoin + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.google.common.base.Preconditions +import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig +import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameOnPort1} +import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchemaInfo, Schema} +import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, PartitionInfo} + +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + +@JsonSchemaInject(json = """ +{ + "attributeTypeRules": { + "buildAttributeName": { + "const": { + "$data": "probeAttributeName" + } + } + } +} +""") +class IncrementalJoinOpDesc[K] extends OperatorDescriptor { + + @JsonProperty(required = true) + @JsonSchemaTitle("Left Input Attribute") + @JsonPropertyDescription("attribute to be joined on the Left Input") + @AutofillAttributeName + var leftAttributeName: String = _ + + @JsonProperty(required = true) + @JsonSchemaTitle("Right Input Attribute") + @JsonPropertyDescription("attribute to be joined on the Right Input") + @AutofillAttributeNameOnPort1 + var rightAttributeName: String = _ + + // incremental inner join can reuse some logic from hash join + lazy val hashJoinOpDesc: HashJoinOpDesc[K] = { + val op = new HashJoinOpDesc[K] + op.buildAttributeName = leftAttributeName + op.probeAttributeName = rightAttributeName + op.joinType = JoinType.INNER + op + } + + override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo) = { + val hashJoinOpExec = hashJoinOpDesc.operatorExecutor(operatorSchemaInfo) + + OpExecConfig + .oneToOneLayer( + operatorIdentifier, + _ => + new IncrementalJoinOpExec[K]( + leftAttributeName, + rightAttributeName, + operatorSchemaInfo + ) + ) + .copy( + inputPorts = operatorInfo.inputPorts, + outputPorts = operatorInfo.outputPorts, + partitionRequirement = hashJoinOpExec.partitionRequirement, + derivePartition = hashJoinOpExec.derivePartition, + ) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Progressive Inner Join", + "join two inputs", + OperatorGroupConstants.JOIN_GROUP, + inputPorts = List(InputPort("left"), InputPort("right")), + outputPorts = List(OutputPort()) + ) + + // remove the probe attribute in the output + override def getOutputSchema(schemas: Array[Schema]): Schema = { + hashJoinOpDesc.getOutputSchema(schemas) + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala new file mode 100644 index 00000000000..aec5ab91367 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala @@ -0,0 +1,123 @@ +package edu.uci.ics.texera.workflow.operators.hashJoin + +import edu.uci.ics.amber.engine.architecture.worker.PauseManager +import edu.uci.ics.amber.engine.common.InputExhausted +import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException +import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.Tuple.BuilderV2 +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchemaInfo, Schema} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +class IncrementalJoinOpExec[K]( + val buildAttributeName: String, + val probeAttributeName: String, + val operatorSchemaInfo: OperatorSchemaInfo +) extends OperatorExecutor { + + val leftSchema: Schema = operatorSchemaInfo.inputSchemas(0) + val rightSchema: Schema = operatorSchemaInfo.inputSchemas(1) + + val leftTuples = new mutable.HashMap[K, (ArrayBuffer[Tuple], Boolean)]() + val rightTuples = new mutable.HashMap[K, (ArrayBuffer[Tuple], Boolean)]() + + override def processTexeraTuple( + tuple: Either[Tuple, InputExhausted], + input: Int, + pauseManager: PauseManager, + asyncRPCClient: AsyncRPCClient + ): Iterator[Tuple] = { + tuple match { + case Left(tuple) => + if (input == 0) { + // left input, find on the right + val key = tuple.getField(buildAttributeName).asInstanceOf[K] + val (matchedTuples, _) = + rightTuples.getOrElse(key, (new ArrayBuffer[Tuple](), false)) + val returnIter = matchedTuples.map(right => { + join(tuple, right) + }).iterator + building(tuple, input) + returnIter + } else { + // right input, find on the left + val key = tuple.getField(probeAttributeName).asInstanceOf[K] + val (matchedTuples, _) = + leftTuples.getOrElse(key, (new ArrayBuffer[Tuple](), false)) + val returnIter = matchedTuples.map(left => { + join(left, tuple) + }).iterator + building(tuple, input) + returnIter + } + case Right(_) => + Iterator() + } + } + + private def join(left: Tuple, right: Tuple): Tuple = { + val builder = Tuple + .newBuilder(operatorSchemaInfo.outputSchemas(0)) + .add(left) + + fillNonJoinFields( + builder, + rightSchema, + right.getFields.toArray(), + resolveDuplicateName = true + ) + + builder.build() + } + + def fillNonJoinFields( + builder: BuilderV2, + schema: Schema, + fields: Array[Object], + resolveDuplicateName: Boolean = false + ): Unit = { + schema.getAttributesScala.filter(attribute => attribute.getName != probeAttributeName) map { + (attribute: Attribute) => + { + val field = fields.apply(schema.getIndex(attribute.getName)) + if (resolveDuplicateName) { + val attributeName = attribute.getName + builder.add( + new Attribute( + if (leftSchema.getAttributeNames.contains(attributeName)) + attributeName + "#@1" + else attributeName, + attribute.getType + ), + field + ) + } else { + builder.add(attribute, field) + } + } + } + } + + private def building(tuple: Tuple, input: Int): Unit = { + val key = tuple.getField(buildAttributeName).asInstanceOf[K] + if (input == 0) { + val (storedTuples, _) = + leftTuples.getOrElseUpdate(key, (new ArrayBuffer[Tuple](), false)) + storedTuples += tuple + } else { + val (storedTuples, _) = + rightTuples.getOrElseUpdate(key, (new ArrayBuffer[Tuple](), false)) + storedTuples += tuple + } + } + + override def open(): Unit = { + } + + override def close(): Unit = { + } + +} From eb3402bd38a63d65ae5b3d77aedf9f2141661b31 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 10 Jul 2023 10:49:36 -0700 Subject: [PATCH 11/18] wip --- .../common/workflow/MaterializationRewriter.scala | 8 +++++++- .../operators/hashJoin/IncrementalJoinOpDesc.scala | 8 +++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala index 4c72bd2d1ca..41fc294480a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala @@ -2,7 +2,9 @@ package edu.uci.ics.texera.workflow.common.workflow import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.amber.engine.common.virtualidentity.LinkIdentity +import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_OP import edu.uci.ics.texera.workflow.common.WorkflowContext +import edu.uci.ics.texera.workflow.common.metadata.InputPort import edu.uci.ics.texera.workflow.common.operators.source.SourceOperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} @@ -63,10 +65,14 @@ class MaterializationRewriter( materializationReader.setContext(context) materializationReader.schema = materializationWriter.getStorage.getSchema val matReaderOutputSchema = materializationReader.getOutputSchemas(Array()) - val matReaderOpExecConfig = + var matReaderOpExecConfig = materializationReader.operatorExecutor( OperatorSchemaInfo(Array(), matReaderOutputSchema) ) + matReaderOpExecConfig = matReaderOpExecConfig.copy( + inputPorts = List(InputPort()), + inputToOrdinalMapping = Map(LinkIdentity(SOURCE_STARTER_OP, matReaderOpExecConfig.id) -> 0) + ) newPlan = newPlan .addOperator(matWriterOpExecConfig) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala index 20e34e37b40..ff20f5a3106 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.workflow.operators.hashJoin -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} import com.google.common.base.Preconditions import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig @@ -15,9 +15,9 @@ import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` @JsonSchemaInject(json = """ { "attributeTypeRules": { - "buildAttributeName": { + "leftAttributeName": { "const": { - "$data": "probeAttributeName" + "$data": "rightAttributeName" } } } @@ -38,6 +38,7 @@ class IncrementalJoinOpDesc[K] extends OperatorDescriptor { var rightAttributeName: String = _ // incremental inner join can reuse some logic from hash join + @JsonIgnore lazy val hashJoinOpDesc: HashJoinOpDesc[K] = { val op = new HashJoinOpDesc[K] op.buildAttributeName = leftAttributeName @@ -47,6 +48,7 @@ class IncrementalJoinOpDesc[K] extends OperatorDescriptor { } override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo) = { + hashJoinOpDesc.setContext(this.context) val hashJoinOpExec = hashJoinOpDesc.operatorExecutor(operatorSchemaInfo) OpExecConfig From 8563bd9c528cb6a746d50cde5d772c8c108a3460 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 25 Sep 2023 23:15:41 -0700 Subject: [PATCH 12/18] clean up --- .../architecture/worker/DataProcessor.scala | 3 +- .../event/WebResultUpdateEvent.scala | 2 +- .../event/WorkflowAvailableResultEvent.scala | 2 +- .../texera/web/service/JobResultService.scala | 61 ++---------------- .../metadata/OperatorMetadataGenerator.scala | 2 +- .../common/operators/OperatorDescriptor.scala | 16 ++++- .../aggregate/FinalAggregateOpExec.scala | 17 +---- .../aggregate/PartialAggregateOpExec.scala | 10 ++- .../consolidate/ConsolidateOpDesc.scala | 13 ++-- .../consolidate/ConsolidateOpExec.scala | 7 ++- .../common/workflow/LogicalPlan.scala | 4 -- .../ProgressiveRetractionEnforcer.scala | 25 ++++---- .../SpecializedAggregateOpDesc.scala | 7 ++- .../dictionary/DictionaryMatcherOpDesc.scala | 2 +- .../hashJoin/IncrementalJoinOpDesc.scala | 14 ++++- .../hashJoin/IncrementalJoinOpExec.scala | 62 ++++++++++--------- .../keywordSearch/KeywordSearchOpDesc.scala | 2 +- .../projection/ProjectionOpDesc.scala | 2 +- .../operators/regex/RegexOpDesc.scala | 2 +- .../sentiment/SentimentAnalysisOpDesc.scala | 2 +- .../operators/union/UnionOpDesc.scala | 2 +- .../unneststring/UnnestStringOpDesc.scala | 2 +- .../lineChart/LineChartOpDesc.scala | 23 +++++-- .../navigation/navigation.component.html | 26 +------- .../context-menu/context-menu.component.html | 20 ------ .../workflow-editor.component.ts | 14 ----- .../service/joint-ui/joint-ui.service.ts | 10 --- .../operator-menu/operator-menu.service.ts | 38 ------------ .../validation/validation-workflow.service.ts | 1 - .../workflow-graph/model/workflow-graph.ts | 54 ---------------- 30 files changed, 135 insertions(+), 310 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 9f4e935b6f2..616438633a0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -192,7 +192,7 @@ class DataProcessor( // dependencies: } catch safely { case e => // forward input tuple to the user and pause DP thread - handleOperatorException(e) + handleOperatorException(e) } outputIterator } @@ -292,7 +292,6 @@ class DataProcessor( // dependencies: } private[this] def handleOperatorException(e: Throwable): Unit = { - e.printStackTrace() if (currentInputTuple.isLeft) { asyncRPCClient.send( LocalOperatorException(currentInputTuple.left.get, e), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala index 6791eb1dc4f..d30390978b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WebResultUpdateEvent.scala @@ -1,5 +1,5 @@ package edu.uci.ics.texera.web.model.websocket.event -import edu.uci.ics.texera.web.service.WebResultUpdate.WebResultUpdate +import edu.uci.ics.texera.web.service.JobResultService.WebResultUpdate case class WebResultUpdateEvent(updates: Map[String, WebResultUpdate]) extends TexeraWebSocketEvent diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala index 9f71efa134b..d57bbe46403 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/event/WorkflowAvailableResultEvent.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.web.model.websocket.event import edu.uci.ics.texera.web.model.websocket.event.WorkflowAvailableResultEvent.OperatorAvailableResult -import edu.uci.ics.texera.web.service.WebResultUpdate.WebOutputMode +import edu.uci.ics.texera.web.service.JobResultService.WebOutputMode object WorkflowAvailableResultEvent { case class OperatorAvailableResult( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala index c46c7d1b160..84bd07d7277 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/JobResultService.scala @@ -26,7 +26,6 @@ import edu.uci.ics.texera.web.workflowruntimestate.JobMetadataStore import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING import edu.uci.ics.texera.web.{SubscriptionManager, TexeraWebApplication} import edu.uci.ics.texera.workflow.common.IncrementalOutputMode -import edu.uci.ics.texera.workflow.common.IncrementalOutputMode.{SET_DELTA, SET_SNAPSHOT} import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.workflow.LogicalPlan @@ -36,7 +35,10 @@ import java.util.UUID import scala.collection.mutable import scala.concurrent.duration.DurationInt -object WebResultUpdate { +object JobResultService { + + val defaultPageSize: Int = 5 + // convert Tuple from engine's format to JSON format def webDataFromTuple( mode: WebOutputMode, @@ -144,61 +146,6 @@ object WebResultUpdate { case class WebDataUpdate(mode: WebOutputMode, table: List[ObjectNode], chartType: Option[String]) extends WebResultUpdate - - /** - * For SET_SNAPSHOT output mode: result is the latest snapshot - * FOR SET_DELTA output mode: - * - for insert-only delta: effectively the same as latest snapshot - * - for insert-retract delta: the union of all delta outputs, not compacted to a snapshot - * - * Produces the WebResultUpdate to send to frontend from a result update from the engine. - */ - def convertWebResultUpdate( - sink: ProgressiveSinkOpDesc, - oldTupleCount: Int, - newTupleCount: Int - ): WebResultUpdate = { - val webOutputMode: WebOutputMode = { - (sink.getOutputMode, sink.getChartType) match { - // visualization sinks use its corresponding mode - case (SET_SNAPSHOT, Some(_)) => SetSnapshotMode() - case (SET_DELTA, Some(_)) => SetDeltaMode() - // Non-visualization sinks use pagination mode - case (_, None) => PaginationMode() - } - } - - val storage = sink.getStorage - val webUpdate = (webOutputMode, sink.getOutputMode) match { - case (PaginationMode(), SET_SNAPSHOT) => - val numTuples = storage.getCount - val maxPageIndex = Math.ceil(numTuples / JobResultService.defaultPageSize.toDouble).toInt - WebPaginationUpdate( - PaginationMode(), - newTupleCount, - (1 to maxPageIndex).toList - ) - case (SetSnapshotMode(), SET_SNAPSHOT) => - webDataFromTuple(webOutputMode, storage.getAll.toList, sink.getChartType) - case (SetDeltaMode(), SET_DELTA) => - val deltaList = storage.getAllAfter(oldTupleCount).toList - webDataFromTuple(webOutputMode, deltaList, sink.getChartType) - - // currently not supported mode combinations - // (PaginationMode, SET_DELTA) | (DataSnapshotMode, SET_DELTA) | (DataDeltaMode, SET_SNAPSHOT) - case _ => - throw new RuntimeException( - "update mode combination not supported: " + (webOutputMode, sink.getOutputMode) - ) - } - webUpdate - } -} - -object JobResultService { - - val defaultPageSize: Int = 5 - } /** diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala index 6162e5ffbd3..b61461a9947 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/metadata/OperatorMetadataGenerator.scala @@ -33,7 +33,7 @@ case class OperatorInfo( dynamicOutputPorts: Boolean = false, supportReconfiguration: Boolean = false, allowPortCustomization: Boolean = false, - supportRetractableInput: Boolean = false, + supportRetractableInput: Boolean = false ) case class OperatorMetadata( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala index 91a7565b365..1eee6ff14c6 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala @@ -17,7 +17,11 @@ import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpDesc import edu.uci.ics.texera.workflow.operators.distinct.DistinctOpDesc import edu.uci.ics.texera.workflow.operators.download.BulkDownloaderOpDesc import edu.uci.ics.texera.workflow.operators.filter.SpecializedFilterOpDesc -import edu.uci.ics.texera.workflow.operators.hashJoin.{HashJoinOpDesc, IncrementalJoinOpDesc, IncrementalJoinOpExec} +import edu.uci.ics.texera.workflow.operators.hashJoin.{ + HashJoinOpDesc, + IncrementalJoinOpDesc, + IncrementalJoinOpExec +} import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc @@ -31,12 +35,18 @@ import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ + TwitterFullArchiveSearchSourceOpDesc, + TwitterSearchSourceOpDesc +} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csvOld.CSVOldScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.json.JSONLScanSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.scan.text.{TextInputSourceOpDesc, TextScanSourceOpDesc} +import edu.uci.ics.texera.workflow.operators.source.scan.text.{ + TextInputSourceOpDesc, + TextScanSourceOpDesc +} import edu.uci.ics.texera.workflow.operators.source.sql.asterixdb.AsterixDBSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala index 7f0513598f1..58b1edb8ded 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala @@ -11,9 +11,6 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchem import scala.collection.mutable -//import scala.collection.mutable -//import scala.collection.mutable.ArrayBuffer - class FinalAggregateOpExec( val aggFuncs: List[DistributedAggregation[Object]], val groupByKeys: List[String], @@ -59,15 +56,6 @@ class FinalAggregateOpExec( case Right(_) => outputDiff() -// partialObjectsPerKey.iterator.map(pair => { -// val finalAggValues = aggFuncs.indices.map(i => aggFuncs(i).finalAgg(pair._2(i))) -// -// val tupleBuilder = Tuple.newBuilder(operatorSchemaInfo.outputSchemas(0)) -// // add group by keys and final agg values -// tupleBuilder.addSequentially((pair._1 ++ finalAggValues).toArray) -// -// tupleBuilder.build() -// }) } } @@ -87,7 +75,7 @@ class FinalAggregateOpExec( val insertions = new mutable.ArrayBuffer[Tuple]() partialObjectsPerKey.keySet.foreach(k => { - if (! previousAggResults.contains(k)) { + if (!previousAggResults.contains(k)) { val newFields = finalAggregate(k, partialObjectsPerKey(k)) insertions.append(addInsertionFlag(newFields, outputSchema)) } else if (previousAggResults(k) != partialObjectsPerKey(k)) { @@ -99,9 +87,6 @@ class FinalAggregateOpExec( }) val results = retractions ++ insertions - results.foreach(r => { - System.err.println("aggResult: " + r.getFields) - }) results.iterator } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala index 2b4c5545f45..5a554171615 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala @@ -6,7 +6,12 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.operators.aggregate.PartialAggregateOpExec.internalAggObjKey import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, OperatorSchemaInfo, Schema} +import edu.uci.ics.texera.workflow.common.tuple.schema.{ + Attribute, + AttributeType, + OperatorSchemaInfo, + Schema +} import java.util.Collections import scala.collection.{JavaConverters, mutable} @@ -74,8 +79,7 @@ class PartialAggregateOpExec( val resultIterator = getPartialOutputs() this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() resultIterator - } - else Iterator() + } else Iterator() case Right(_) => val resultIterator = getPartialOutputs() this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala index e3704e5012d..34f6b8561df 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala @@ -2,7 +2,12 @@ package edu.uci.ics.texera.workflow.common.operators.consolidate import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig import edu.uci.ics.texera.workflow.common.ProgressiveUtils -import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{ + InputPort, + OperatorGroupConstants, + OperatorInfo, + OutputPort +} import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpExec @@ -10,7 +15,7 @@ import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpExec import scala.collection.JavaConverters.asScalaBuffer import scala.collection.immutable.List -class ConsolidateOpDesc extends OperatorDescriptor{ +class ConsolidateOpDesc extends OperatorDescriptor { override def operatorInfo: OperatorInfo = { OperatorInfo( "Consolidate", @@ -18,14 +23,14 @@ class ConsolidateOpDesc extends OperatorDescriptor{ OperatorGroupConstants.UTILITY_GROUP, List(InputPort("")), List(OutputPort("")), - supportRetractableInput = true, + supportRetractableInput = true ) } override def getOutputSchema(schemas: Array[Schema]): Schema = { val newAttrs = asScalaBuffer(schemas(0).getAttributes) .filter(attr => attr != ProgressiveUtils.insertRetractFlagAttr) - Schema.newBuilder().add(newAttrs.toArray :_*).build() + Schema.newBuilder().add(newAttrs.toArray: _*).build() } override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo): OpExecConfig = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala index 424f6638fdf..4674ee8958b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpExec.scala @@ -14,7 +14,12 @@ class ConsolidateOpExec(operatorSchemaInfo: OperatorSchemaInfo) extends Operator private val results = new ArrayBuffer[Tuple]() - override def processTexeraTuple(tuple: Either[Tuple, InputExhausted], input: Int, pauseManager: PauseManager, asyncRPCClient: AsyncRPCClient): Iterator[Tuple] = { + override def processTexeraTuple( + tuple: Either[Tuple, InputExhausted], + input: Int, + pauseManager: PauseManager, + asyncRPCClient: AsyncRPCClient + ): Iterator[Tuple] = { tuple match { case Left(t) => diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala index c5faa8134bb..8b18a1cbbfb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/LogicalPlan.scala @@ -291,8 +291,4 @@ case class LogicalPlan( physicalPlan } -// def normalize(): LogicalPlan = { -// SinkInjectionTransformer.transform(this) -// } - } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala index e519af389e8..6103f228e0d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/ProgressiveRetractionEnforcer.scala @@ -13,16 +13,19 @@ object ProgressiveRetractionEnforcer { logicalPlan.outputSchemaMap.foreach(kv => { val op = kv._1 val outSchemas = kv._2 - logicalPlan.getDownstreamEdges(op.operator).zip(outSchemas).foreach(out => { - val outEdge = out._1 - val outSchema = out._2 - if (outSchema.containsAttribute(ProgressiveUtils.insertRetractFlagAttr.getName)) { - val downstreamOp = logicalPlan.getOperator(outEdge.destination.operatorID) - if (! downstreamOp.operatorInfo.supportRetractableInput) { - edgesToAddConsolidateOp.append(outEdge) + logicalPlan + .getDownstreamEdges(op.operator) + .zip(outSchemas) + .foreach(out => { + val outEdge = out._1 + val outSchema = out._2 + if (outSchema.containsAttribute(ProgressiveUtils.insertRetractFlagAttr.getName)) { + val downstreamOp = logicalPlan.getOperator(outEdge.destination.operatorID) + if (!downstreamOp.operatorInfo.supportRetractableInput) { + edgesToAddConsolidateOp.append(outEdge) + } } - } - }) + }) }) var resultPlan = logicalPlan @@ -30,7 +33,8 @@ object ProgressiveRetractionEnforcer { val newOp = new ConsolidateOpDesc() newOp.setContext(context) resultPlan = resultPlan.removeEdge(edge) - resultPlan = resultPlan.addOperator(newOp) + resultPlan = resultPlan + .addOperator(newOp) .addEdge(edge.origin.operatorID, newOp.operatorID, edge.origin.portOrdinal, 0) .addEdge(newOp.operatorID, edge.destination.operatorID, 0, edge.destination.portOrdinal) }) @@ -38,5 +42,4 @@ object ProgressiveRetractionEnforcer { resultPlan } - } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala index 2d0dd461dca..cbd94e4e905 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/aggregate/SpecializedAggregateOpDesc.scala @@ -4,7 +4,12 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.texera.workflow.common.ProgressiveUtils import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeNameList -import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{ + InputPort, + OperatorGroupConstants, + OperatorInfo, + OutputPort +} import edu.uci.ics.texera.workflow.common.operators.aggregate.AggregateOpDesc import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} import edu.uci.ics.texera.workflow.common.workflow.PhysicalPlan diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala index 62e7da505b0..4bc7a6b1d03 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/dictionary/DictionaryMatcherOpDesc.scala @@ -45,7 +45,7 @@ class DictionaryMatcherOpDesc extends MapOpDesc { inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true, - supportRetractableInput = true, + supportRetractableInput = true ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala index ff20f5a3106..9e3de488088 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala @@ -4,8 +4,16 @@ import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyD import com.google.common.base.Preconditions import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig -import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameOnPort1} -import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{ + AutofillAttributeName, + AutofillAttributeNameOnPort1 +} +import edu.uci.ics.texera.workflow.common.metadata.{ + InputPort, + OperatorGroupConstants, + OperatorInfo, + OutputPort +} import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchemaInfo, Schema} import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, PartitionInfo} @@ -65,7 +73,7 @@ class IncrementalJoinOpDesc[K] extends OperatorDescriptor { inputPorts = operatorInfo.inputPorts, outputPorts = operatorInfo.outputPorts, partitionRequirement = hashJoinOpExec.partitionRequirement, - derivePartition = hashJoinOpExec.derivePartition, + derivePartition = hashJoinOpExec.derivePartition ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala index aec5ab91367..650ad808bc4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala @@ -37,9 +37,11 @@ class IncrementalJoinOpExec[K]( val key = tuple.getField(buildAttributeName).asInstanceOf[K] val (matchedTuples, _) = rightTuples.getOrElse(key, (new ArrayBuffer[Tuple](), false)) - val returnIter = matchedTuples.map(right => { - join(tuple, right) - }).iterator + val returnIter = matchedTuples + .map(right => { + join(tuple, right) + }) + .iterator building(tuple, input) returnIter } else { @@ -47,9 +49,11 @@ class IncrementalJoinOpExec[K]( val key = tuple.getField(probeAttributeName).asInstanceOf[K] val (matchedTuples, _) = leftTuples.getOrElse(key, (new ArrayBuffer[Tuple](), false)) - val returnIter = matchedTuples.map(left => { - join(left, tuple) - }).iterator + val returnIter = matchedTuples + .map(left => { + join(left, tuple) + }) + .iterator building(tuple, input) returnIter } @@ -74,30 +78,30 @@ class IncrementalJoinOpExec[K]( } def fillNonJoinFields( - builder: BuilderV2, - schema: Schema, - fields: Array[Object], - resolveDuplicateName: Boolean = false + builder: BuilderV2, + schema: Schema, + fields: Array[Object], + resolveDuplicateName: Boolean = false ): Unit = { schema.getAttributesScala.filter(attribute => attribute.getName != probeAttributeName) map { (attribute: Attribute) => - { - val field = fields.apply(schema.getIndex(attribute.getName)) - if (resolveDuplicateName) { - val attributeName = attribute.getName - builder.add( - new Attribute( - if (leftSchema.getAttributeNames.contains(attributeName)) - attributeName + "#@1" - else attributeName, - attribute.getType - ), - field - ) - } else { - builder.add(attribute, field) + { + val field = fields.apply(schema.getIndex(attribute.getName)) + if (resolveDuplicateName) { + val attributeName = attribute.getName + builder.add( + new Attribute( + if (leftSchema.getAttributeNames.contains(attributeName)) + attributeName + "#@1" + else attributeName, + attribute.getType + ), + field + ) + } else { + builder.add(attribute, field) + } } - } } } @@ -114,10 +118,8 @@ class IncrementalJoinOpExec[K]( } } - override def open(): Unit = { - } + override def open(): Unit = {} - override def close(): Unit = { - } + override def close(): Unit = {} } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala index 2c6db894280..fc5bbea134a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/keywordSearch/KeywordSearchOpDesc.scala @@ -38,6 +38,6 @@ class KeywordSearchOpDesc extends FilterOpDesc { inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true, - supportRetractableInput = true, + supportRetractableInput = true ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala index 04d2328659f..1cf837e79d7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/projection/ProjectionOpDesc.scala @@ -56,7 +56,7 @@ class ProjectionOpDesc extends MapOpDesc { inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = false, - supportRetractableInput = true, + supportRetractableInput = true ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala index e4addb55ddd..21423cbad2d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/regex/RegexOpDesc.scala @@ -41,6 +41,6 @@ class RegexOpDesc extends FilterOpDesc { inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true, - supportRetractableInput = true, + supportRetractableInput = true ) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala index 12029c2fd4f..396374d177f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sentiment/SentimentAnalysisOpDesc.scala @@ -54,7 +54,7 @@ class SentimentAnalysisOpDesc extends MapOpDesc { List(InputPort("")), List(OutputPort("")), supportReconfiguration = true, - supportRetractableInput = true, + supportRetractableInput = true ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala index 8d12bac88f2..8365ee2a3d9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/union/UnionOpDesc.scala @@ -24,7 +24,7 @@ class UnionOpDesc extends OperatorDescriptor { OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort(allowMultiInputs = true)), outputPorts = List(OutputPort()), - supportRetractableInput = true, + supportRetractableInput = true ) override def getOutputSchema(schemas: Array[Schema]): Schema = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala index 94278c0e6e8..8543342ad4e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/unneststring/UnnestStringOpDesc.scala @@ -35,7 +35,7 @@ class UnnestStringOpDesc extends FlatMapOpDesc { operatorGroupName = OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), - supportRetractableInput = true, + supportRetractableInput = true ) override def operatorExecutor(operatorSchemaInfo: OperatorSchemaInfo) = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala index 0ec9732351e..0a69b9113b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/lineChart/LineChartOpDesc.scala @@ -2,11 +2,26 @@ package edu.uci.ics.texera.workflow.operators.visualization.lineChart import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig -import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameList} -import edu.uci.ics.texera.workflow.common.metadata.{InputPort, OperatorGroupConstants, OperatorInfo, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{ + AutofillAttributeName, + AutofillAttributeNameList +} +import edu.uci.ics.texera.workflow.common.metadata.{ + InputPort, + OperatorGroupConstants, + OperatorInfo, + OutputPort +} import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} -import edu.uci.ics.texera.workflow.operators.aggregate.{AggregationFunction, AggregationOperation, SpecializedAggregateOpDesc} -import edu.uci.ics.texera.workflow.operators.visualization.{VisualizationConstants, VisualizationOperator} +import edu.uci.ics.texera.workflow.operators.aggregate.{ + AggregationFunction, + AggregationOperation, + SpecializedAggregateOpDesc +} +import edu.uci.ics.texera.workflow.operators.visualization.{ + VisualizationConstants, + VisualizationOperator +} import java.util.Collections.singletonList import scala.jdk.CollectionConverters.asScalaBuffer diff --git a/core/new-gui/src/app/workspace/component/navigation/navigation.component.html b/core/new-gui/src/app/workspace/component/navigation/navigation.component.html index 723498d664f..de9a830fea4 100644 --- a/core/new-gui/src/app/workspace/component/navigation/navigation.component.html +++ b/core/new-gui/src/app/workspace/component/navigation/navigation.component.html @@ -323,7 +323,7 @@ title="reuse result if possible"> - -
  • remove view result -
  • - view result -
  • -
  • - remove view result -
  • { - event.newViewResultOps.concat(event.newUnviewResultOps).forEach(opID => { - const op = this.workflowActionService.getTexeraGraph().getOperator(opID); - this.jointUIService.changeOperatorViewResultStatus(this.getJointPaper(), op, op.viewResult); - }); - }); - } - private handleViewOperatorResult(): void { this.workflowActionService diff --git a/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts b/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts index c4b9b983a81..de10e789b64 100644 --- a/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts +++ b/core/new-gui/src/app/workspace/service/joint-ui/joint-ui.service.ts @@ -935,16 +935,6 @@ export class JointUIService { width: 40, height: 40, "ref-x": 75, - "ref-y": 20, - ref: "rect.body", - "x-alignment": "middle", - "y-alignment": "middle", - }, - ".texera-operator-view-result-icon": { - "xlink:href": JointUIService.getOperatorViewResultIcon(operator), - width: 20, - height: 20, - "ref-x": 75, "ref-y": 50, ref: "rect.body", "x-alignment": "middle", diff --git a/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts b/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts index 20531fffdd5..be1bc34a7c7 100644 --- a/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts +++ b/core/new-gui/src/app/workspace/service/operator-menu/operator-menu.service.ts @@ -55,9 +55,6 @@ export class OperatorMenuService { public isReuseResultClickable: boolean = false; public isMarkForReuse: boolean = true; - public isReuseResultClickable: boolean = false; - public isMarkForReuse: boolean = true; - public readonly COPY_OFFSET = 20; constructor( @@ -146,20 +143,6 @@ export class OperatorMenuService { } } - public reuseResultHighlightedOperator(): void { - const effectiveHighlightedOperatorsExcludeSink = this.effectivelyHighlightedOperators.value.filter( - op => !isSink(this.workflowActionService.getTexeraGraph().getOperator(op)) - ); - - console.log("calling mark reuse") - console.log(effectiveHighlightedOperatorsExcludeSink) - if (this.isMarkForReuse) { - this.workflowActionService.markReuseResults(effectiveHighlightedOperatorsExcludeSink); - } else { - this.workflowActionService.removeMarkReuseResults(effectiveHighlightedOperatorsExcludeSink); - } - } - /** * Updates the status of the disable operator icon: * If all selected operators are disabled, then click it will re-enable the operators @@ -224,27 +207,6 @@ export class OperatorMenuService { }); } - handleReuseOperatorResultStatusChange() { - merge( - this.effectivelyHighlightedOperators, - this.workflowActionService.getTexeraGraph().getReuseCacheOperatorsChangedStream(), - this.workflowActionService.getWorkflowModificationEnabledStream() - ).subscribe(event => { - const effectiveHighlightedOperatorsExcludeSink = this.effectivelyHighlightedOperators.value.filter( - op => !isSink(this.workflowActionService.getTexeraGraph().getOperator(op)) - ); - - const allMarkedForReuse = effectiveHighlightedOperatorsExcludeSink.every(op => - this.workflowActionService.getTexeraGraph().isMarkedForReuseResult(op) - ); - - this.isMarkForReuse = !allMarkedForReuse; - this.isReuseResultClickable = - effectiveHighlightedOperatorsExcludeSink.length !== 0 && - this.workflowActionService.checkWorkflowModificationEnabled(); - }); - } - /** * saves highlighted elements to the system clipboard */ diff --git a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts index 280f6a91c4e..4357c5a3a5d 100644 --- a/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts +++ b/core/new-gui/src/app/workspace/service/validation/validation-workflow.service.ts @@ -68,7 +68,6 @@ export class ValidationWorkflowService { this.operatorSchemaList = metadata.operators; this.initializeValidation(); }); - this.getWorkflowValidationErrorStream().subscribe(c => console.log(c)); } public getCurrentWorkflowValidationError(): { diff --git a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts index 661c8cda6fd..b182bd291b9 100644 --- a/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts +++ b/core/new-gui/src/app/workspace/service/workflow-graph/model/workflow-graph.ts @@ -536,60 +536,6 @@ export class WorkflowGraph { ); } - /** - * Changes markedForReuse status which is an atomic boolean value as opposed to y-type data. - * @param operatorID - */ - public markReuseResult(operatorID: string): void { - const operator = this.getOperator(operatorID); - if (!operator) { - throw new Error(`operator with ID ${operatorID} doesn't exist`); - } - if (isSink(operator)) { - return; - } - if (this.isMarkedForReuseResult(operatorID)) { - return; - } - console.log("seeting marked for reuse in shared model") - this.sharedModel.operatorIDMap.get(operatorID)?.set("markedForReuse", true); - } - - /** - * Changes markedForReuse status which is an atomic boolean value as opposed to y-type data. - * @param operatorID - */ - public removeMarkReuseResult(operatorID: string): void { - const operator = this.getOperator(operatorID); - if (!operator) { - throw new Error(`operator with ID ${operatorID} doesn't exist`); - } - if (!this.isMarkedForReuseResult(operatorID)) { - return; - } - this.sharedModel.operatorIDMap.get(operatorID)?.set("markedForReuse", false); - } - - /** - * This method gets this status from readonly object version of the operator data as opposed to y-type data. - * @param operatorID - */ - public isMarkedForReuseResult(operatorID: string): boolean { - const operator = this.getOperator(operatorID); - if (!operator) { - throw new Error(`operator with ID ${operatorID} doesn't exist`); - } - return operator.markedForReuse ?? false; - } - - public getOperatorsMarkedForReuseResult(): ReadonlySet { - return new Set( - Array.from(this.sharedModel.operatorIDMap.keys() as IterableIterator).filter(op => - this.isMarkedForReuseResult(op) - ) - ); - } - /** * Returns whether the operator exists in the graph. * @param operatorID operator ID From 6ea5dec6d3b6941e2c6a44934167a22d40afa892 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 25 Sep 2023 23:24:57 -0700 Subject: [PATCH 13/18] fix format --- .../workflow/common/operators/OperatorDescriptor.scala | 3 +-- .../common/operators/aggregate/PartialAggregateOpExec.scala | 3 +-- .../common/operators/consolidate/ConsolidateOpDesc.scala | 1 - .../workflow/operators/hashJoin/IncrementalJoinOpDesc.scala | 5 +---- .../workflow/operators/hashJoin/IncrementalJoinOpExec.scala | 1 - 5 files changed, 3 insertions(+), 10 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala index 1eee6ff14c6..34b77bbc7ca 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala @@ -19,8 +19,7 @@ import edu.uci.ics.texera.workflow.operators.download.BulkDownloaderOpDesc import edu.uci.ics.texera.workflow.operators.filter.SpecializedFilterOpDesc import edu.uci.ics.texera.workflow.operators.hashJoin.{ HashJoinOpDesc, - IncrementalJoinOpDesc, - IncrementalJoinOpExec + IncrementalJoinOpDesc } import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala index 5a554171615..c2c3aa2498e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala @@ -13,8 +13,7 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.{ Schema } -import java.util.Collections -import scala.collection.{JavaConverters, mutable} +import scala.collection.mutable import scala.jdk.CollectionConverters.asJavaIterableConverter object PartialAggregateOpExec { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala index 34f6b8561df..08f079ae926 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/consolidate/ConsolidateOpDesc.scala @@ -10,7 +10,6 @@ import edu.uci.ics.texera.workflow.common.metadata.{ } import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} -import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpExec import scala.collection.JavaConverters.asScalaBuffer import scala.collection.immutable.List diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala index 9e3de488088..82e3b3c0045 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala @@ -1,7 +1,6 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} -import com.google.common.base.Preconditions import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecConfig import edu.uci.ics.texera.workflow.common.metadata.annotations.{ @@ -15,10 +14,8 @@ import edu.uci.ics.texera.workflow.common.metadata.{ OutputPort } import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, OperatorSchemaInfo, Schema} -import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, PartitionInfo} +import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} -import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` @JsonSchemaInject(json = """ { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala index 650ad808bc4..ee38078d3c3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpExec.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import edu.uci.ics.amber.engine.architecture.worker.PauseManager import edu.uci.ics.amber.engine.common.InputExhausted -import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple From 4bc62ab8c76dcac18c60e3c57a5d839523557f97 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 25 Sep 2023 23:34:40 -0700 Subject: [PATCH 14/18] format --- .../workflow/common/operators/OperatorDescriptor.scala | 5 +---- .../workflow/operators/hashJoin/IncrementalJoinOpDesc.scala | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala index 34b77bbc7ca..0f882db7bf3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorDescriptor.scala @@ -17,10 +17,7 @@ import edu.uci.ics.texera.workflow.operators.difference.DifferenceOpDesc import edu.uci.ics.texera.workflow.operators.distinct.DistinctOpDesc import edu.uci.ics.texera.workflow.operators.download.BulkDownloaderOpDesc import edu.uci.ics.texera.workflow.operators.filter.SpecializedFilterOpDesc -import edu.uci.ics.texera.workflow.operators.hashJoin.{ - HashJoinOpDesc, - IncrementalJoinOpDesc -} +import edu.uci.ics.texera.workflow.operators.hashJoin.{HashJoinOpDesc, IncrementalJoinOpDesc} import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala index 82e3b3c0045..f308c3e2b1b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/IncrementalJoinOpDesc.scala @@ -16,7 +16,6 @@ import edu.uci.ics.texera.workflow.common.metadata.{ import edu.uci.ics.texera.workflow.common.operators.OperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} - @JsonSchemaInject(json = """ { "attributeTypeRules": { From 9296f7cbd70dbbc8a6b3dc8e7806aee9931853da Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Mon, 25 Sep 2023 23:55:39 -0700 Subject: [PATCH 15/18] add comments --- .../resource/SchemaPropagationResource.scala | 1 + .../aggregate/FinalAggregateOpExec.scala | 39 ++++++------ .../aggregate/PartialAggregateOpExec.scala | 59 +++++++++++-------- 3 files changed, 56 insertions(+), 43 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala index 95e8c53c74f..68360e1a413 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala @@ -40,6 +40,7 @@ class SchemaPropagationResource { val responseContent = schemaPropagationResult.map(e => (e._1.operator, e._2.map(s => s.map(o => o.getAttributesScala))) ) + // remove internal incremental computation columns responseContent.map(kv => { val schemaWithoutInternalAttrs = kv._2.map(portSchema => { portSchema.map(attrs => attrs.filter(attr => attr.getName.startsWith("__internal"))) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala index 58b1edb8ded..e5d5d0e7850 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/FinalAggregateOpExec.scala @@ -25,10 +25,11 @@ class FinalAggregateOpExec( private var partialObjectsPerKey = Map[List[Object], List[Object]]() // for incremental computation + // the time interval that aggregate operator emits incremental update to downstream val UPDATE_INTERVAL_MS = 1000 + // the timestamp of the last incremental update private var lastUpdatedTime: Long = 0 - private var counterSinceLastUpdate: Long = 0 - + // the aggregation state at the last output, used to compute diff with the next output private var previousAggResults = Map[List[Object], List[Object]]() override def open(): Unit = {} @@ -45,40 +46,41 @@ class FinalAggregateOpExec( } tuple match { case Left(t) => - counterSinceLastUpdate += 1 - insertPartialInput(t) - - val condition: Boolean = System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS - if (condition) - outputDiff() - else - Iterator() - + insertToFinalAggState(t) + if (shouldEmitOutput()) emitDiffAndUpdateState() else Iterator() case Right(_) => - outputDiff() + emitDiffAndUpdateState() } } - private def outputDiff(): Iterator[Tuple] = { - val resultIterator = calculateDiff() + private def shouldEmitOutput(): Boolean = { + System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS + } - counterSinceLastUpdate = 0 + private def emitDiffAndUpdateState(): Iterator[Tuple] = { + val resultIterator = calculateDiff() + // reset last updated time and previous output results lastUpdatedTime = System.currentTimeMillis + // saves the current aggregation state, + // note that partialObjectsPerKey is an immutable map variable + // subsequent updates will change the map pointed by var, but not change the old map previousAggResults = partialObjectsPerKey resultIterator } private def calculateDiff(): Iterator[Tuple] = { - // find differences - + // find differences between the previous and the current aggregation state val retractions = new mutable.ArrayBuffer[Tuple]() val insertions = new mutable.ArrayBuffer[Tuple]() partialObjectsPerKey.keySet.foreach(k => { if (!previousAggResults.contains(k)) { + // this key doesn't exist in the previous state, emit as an insertion tuple val newFields = finalAggregate(k, partialObjectsPerKey(k)) insertions.append(addInsertionFlag(newFields, outputSchema)) } else if (previousAggResults(k) != partialObjectsPerKey(k)) { + // this key already exists in the state and its value has changed + // first retract the previously emitted value, then emit an insertion of the new value val prevFields = finalAggregate(k, previousAggResults(k)) retractions.append(addRetractionFlag(prevFields, outputSchema)) val newFields = finalAggregate(k, partialObjectsPerKey(k)) @@ -91,7 +93,8 @@ class FinalAggregateOpExec( results.iterator } - private def insertPartialInput(t: Tuple): Unit = { + // apply partial aggregation's incremental update to the final aggregation state + private def insertToFinalAggState(t: Tuple): Unit = { val key = if (groupByKeys == null || groupByKeys.isEmpty) List() else groupByKeys.map(k => t.getField[Object](k)) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala index c2c3aa2498e..5fd15317d45 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/aggregate/PartialAggregateOpExec.scala @@ -40,7 +40,9 @@ class PartialAggregateOpExec( var partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() // for incremental computation + // the time interval that partial aggregate operator emits incremental update to final aggregate val UPDATE_INTERVAL_MS = 500 + // the timestamp of the last incremental update private var lastUpdatedTime: Long = 0 override def open(): Unit = {} @@ -57,35 +59,24 @@ class PartialAggregateOpExec( } tuple match { case Left(t) => - val key = - if (groupByKeys == null || groupByKeys.isEmpty) List() - else groupByKeys.map(k => t.getField[Object](k)) - - if (!partialObjectsPerKey.contains(key)) - partialObjectsPerKey.put(key, aggFuncs.map(aggFunc => aggFunc.init())) - - val partialObjects = - partialObjectsPerKey.getOrElseUpdate(key, aggFuncs.map(aggFunc => aggFunc.init())) - val updatedPartialObjects = aggFuncs.zip(partialObjects).map { - case (aggFunc, partial) => - aggFunc.iterate(partial, t) - } - partialObjectsPerKey.put(key, updatedPartialObjects) - - val condition = System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS - if (condition) { - lastUpdatedTime = System.currentTimeMillis - val resultIterator = getPartialOutputs() - this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() - resultIterator - } else Iterator() + insertToPartialAggState(t) + if (shouldEmitOutput()) emitOutputAndResetState() else Iterator() case Right(_) => - val resultIterator = getPartialOutputs() - this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() - resultIterator + emitOutputAndResetState() } } + private def shouldEmitOutput(): Boolean = { + System.currentTimeMillis - lastUpdatedTime > UPDATE_INTERVAL_MS + } + + private def emitOutputAndResetState(): scala.Iterator[Tuple] = { + lastUpdatedTime = System.currentTimeMillis + val resultIterator = getPartialOutputs() + this.partialObjectsPerKey = new mutable.HashMap[List[Object], List[Object]]() + resultIterator + } + private def getPartialOutputs(): scala.Iterator[Tuple] = { partialObjectsPerKey.iterator.map(pair => { val tupleFields = pair._1 ++ pair._2 @@ -93,4 +84,22 @@ class PartialAggregateOpExec( }) } + private def insertToPartialAggState(t: Tuple): Unit = { + val key = + if (groupByKeys == null || groupByKeys.isEmpty) List() + else groupByKeys.map(k => t.getField[Object](k)) + + if (!partialObjectsPerKey.contains(key)) + partialObjectsPerKey.put(key, aggFuncs.map(aggFunc => aggFunc.init())) + + val partialObjects = + partialObjectsPerKey.getOrElseUpdate(key, aggFuncs.map(aggFunc => aggFunc.init())) + val updatedPartialObjects = aggFuncs.zip(partialObjects).map { + case (aggFunc, partial) => + aggFunc.iterate(partial, t) + } + partialObjectsPerKey.put(key, updatedPartialObjects) + + } + } From 1dc1246e86ed66562af9399e02a6da816cc4acf6 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 29 Sep 2023 00:16:54 -0700 Subject: [PATCH 16/18] update --- .../ics/texera/web/resource/SchemaPropagationResource.scala | 6 +++--- .../texera/workflow/common/workflow/WorkflowCompiler.scala | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala index 68360e1a413..f9524b2c95a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala @@ -41,13 +41,13 @@ class SchemaPropagationResource { (e._1.operator, e._2.map(s => s.map(o => o.getAttributesScala))) ) // remove internal incremental computation columns - responseContent.map(kv => { + val responseContentCleaned = responseContent.map(kv => { val schemaWithoutInternalAttrs = kv._2.map(portSchema => { - portSchema.map(attrs => attrs.filter(attr => attr.getName.startsWith("__internal"))) + portSchema.map(attrs => attrs.filter(attr => ! attr.getName.startsWith("__internal"))) }) (kv._1, schemaWithoutInternalAttrs) }) - SchemaPropagationResponse(0, responseContent, null) + SchemaPropagationResponse(0, responseContentCleaned, null) } catch { case e: Throwable => e.printStackTrace() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala index d010f65e46e..62de35cbf63 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/WorkflowCompiler.scala @@ -72,12 +72,16 @@ class WorkflowCompiler(val logicalPlan: LogicalPlan, val context: WorkflowContex opResultStorage: OpResultStorage, lastCompletedJob: Option[LogicalPlan] = Option.empty ): Workflow = { + // perform rewrite to reuse cache of previous runs val cacheReuses = new WorkflowCacheChecker(lastCompletedJob, logicalPlan).getValidCacheReuse() val opsToReuseCache = cacheReuses.intersect(logicalPlan.opsToReuseCache.toSet) - val rewrittenLogicalPlan = + var rewrittenLogicalPlan = WorkflowCacheRewriter.transform(logicalPlan, opResultStorage, opsToReuseCache) rewrittenLogicalPlan.operatorMap.values.foreach(initOperator) + // perform rewrite to enforce progressive computation constraints + rewrittenLogicalPlan = ProgressiveRetractionEnforcer.enforceDelta(rewrittenLogicalPlan, context) + // assign sink storage to the logical plan after cache rewrite // as it will be converted to the actual physical plan assignSinkStorage(rewrittenLogicalPlan, opResultStorage, opsToReuseCache) From 634d477c1766624c3f78a4d4d1b78bd544c816bf Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 29 Sep 2023 00:17:20 -0700 Subject: [PATCH 17/18] format --- .../uci/ics/texera/web/resource/SchemaPropagationResource.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala index f9524b2c95a..98cc2e96a1c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/SchemaPropagationResource.scala @@ -43,7 +43,7 @@ class SchemaPropagationResource { // remove internal incremental computation columns val responseContentCleaned = responseContent.map(kv => { val schemaWithoutInternalAttrs = kv._2.map(portSchema => { - portSchema.map(attrs => attrs.filter(attr => ! attr.getName.startsWith("__internal"))) + portSchema.map(attrs => attrs.filter(attr => !attr.getName.startsWith("__internal"))) }) (kv._1, schemaWithoutInternalAttrs) }) From 50cd7514b4ddad65303649cb1808ac249327b289 Mon Sep 17 00:00:00 2001 From: Zuozhi Wang Date: Fri, 29 Sep 2023 00:20:41 -0700 Subject: [PATCH 18/18] remove unrelated change --- .../common/workflow/MaterializationRewriter.scala | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala index d198d8bac01..b450974fa01 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/workflow/MaterializationRewriter.scala @@ -2,9 +2,7 @@ package edu.uci.ics.texera.workflow.common.workflow import com.typesafe.scalalogging.LazyLogging import edu.uci.ics.amber.engine.common.virtualidentity.LinkIdentity -import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_OP import edu.uci.ics.texera.workflow.common.WorkflowContext -import edu.uci.ics.texera.workflow.common.metadata.InputPort import edu.uci.ics.texera.workflow.common.operators.source.SourceOperatorDescriptor import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.tuple.schema.{OperatorSchemaInfo, Schema} @@ -65,14 +63,10 @@ class MaterializationRewriter( materializationReader.setContext(context) materializationReader.schema = materializationWriter.getStorage.getSchema val matReaderOutputSchema = materializationReader.getOutputSchemas(Array()) - var matReaderOpExecConfig = + val matReaderOpExecConfig = materializationReader.operatorExecutor( OperatorSchemaInfo(Array(), matReaderOutputSchema) ) - matReaderOpExecConfig = matReaderOpExecConfig.copy( - inputPorts = List(InputPort()), - inputToOrdinalMapping = Map(LinkIdentity(SOURCE_STARTER_OP, matReaderOpExecConfig.id) -> 0) - ) newPlan = newPlan .addOperator(matWriterOpExecConfig)