-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1270 from RumbleDB/delta-lake-functions
Delta lake functions
- Loading branch information
Showing
13 changed files
with
235 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 89 additions & 0 deletions
89
.../java/org/rumbledb/runtime/functions/delta_lake/CreateDeltaLakeTableFunctionIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package org.rumbledb.runtime.functions.delta_lake; | ||
|
||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.RowFactory; | ||
import org.apache.spark.sql.types.DataTypes; | ||
import org.apache.spark.sql.types.StructType; | ||
import org.rumbledb.api.Item; | ||
import org.rumbledb.context.DynamicContext; | ||
import org.rumbledb.context.RuntimeStaticContext; | ||
import org.rumbledb.exceptions.CannotRetrieveResourceException; | ||
import org.rumbledb.items.BooleanItem; | ||
import org.rumbledb.runtime.AtMostOneItemLocalRuntimeIterator; | ||
import org.rumbledb.runtime.RuntimeIterator; | ||
import org.rumbledb.runtime.functions.input.FileSystemUtil; | ||
import org.apache.spark.sql.Row; | ||
import sparksoniq.spark.SparkSessionManager; | ||
|
||
import java.io.File; | ||
import java.net.URI; | ||
import java.util.List; | ||
|
||
import static org.apache.spark.sql.functions.lit; | ||
import static org.apache.spark.sql.functions.monotonically_increasing_id; | ||
|
||
public class CreateDeltaLakeTableFunctionIterator extends AtMostOneItemLocalRuntimeIterator { | ||
|
||
public CreateDeltaLakeTableFunctionIterator( | ||
List<RuntimeIterator> arguments, | ||
RuntimeStaticContext staticContext | ||
) { | ||
super(arguments, staticContext); | ||
} | ||
|
||
@Override | ||
public Item materializeFirstItemOrNull(DynamicContext context) { | ||
RuntimeIterator urlIterator = this.children.get(0); | ||
urlIterator.open(context); | ||
String url = urlIterator.next().getStringValue(); | ||
urlIterator.close(); | ||
URI uri = FileSystemUtil.resolveURI(this.staticURI, url, getMetadata()); | ||
if (FileSystemUtil.exists(uri, context.getRumbleRuntimeConfiguration(), getMetadata())) { | ||
throw new CannotRetrieveResourceException( | ||
"File " + uri + " already exists. Cannot create new delta lake table at this location.", | ||
getMetadata() | ||
); | ||
} | ||
try { | ||
File directory = new File(uri.getPath()); | ||
if (!directory.exists()) { | ||
boolean mkdirs = directory.mkdirs(); | ||
if (!mkdirs) { | ||
throw new RuntimeException("Failed to create directory " + directory); | ||
} | ||
} | ||
Dataset<Row> dataFrame = SparkSessionManager.getInstance() | ||
.getOrCreateSession() | ||
.emptyDataFrame(); | ||
dataFrame = dataFrame.withColumn(SparkSessionManager.mutabilityLevelColumnName, lit(0)); | ||
dataFrame = dataFrame.withColumn(SparkSessionManager.rowIdColumnName, monotonically_increasing_id()); | ||
dataFrame = dataFrame.withColumn(SparkSessionManager.pathInColumnName, lit("")); | ||
dataFrame = dataFrame.withColumn(SparkSessionManager.tableLocationColumnName, lit(uri.toString())); | ||
|
||
StructType schema = new StructType() | ||
.add(SparkSessionManager.mutabilityLevelColumnName, DataTypes.IntegerType, false) | ||
.add(SparkSessionManager.rowIdColumnName, DataTypes.IntegerType, false) | ||
.add(SparkSessionManager.pathInColumnName, DataTypes.StringType, false) | ||
.add(SparkSessionManager.tableLocationColumnName, DataTypes.StringType, false); | ||
|
||
Row newRow = RowFactory.create( | ||
0, | ||
0, | ||
"", | ||
uri.toString() | ||
); | ||
|
||
Dataset<Row> newRowDataFrame = SparkSessionManager.getInstance() | ||
.getOrCreateSession() | ||
.createDataFrame(List.of(newRow), schema); | ||
|
||
Dataset<Row> combinedDataFrame = dataFrame.union(newRowDataFrame); | ||
|
||
combinedDataFrame.write().format("delta").mode("error").save(uri.toString()); | ||
return new BooleanItem(true); | ||
} catch (RuntimeException e) { | ||
e.printStackTrace(); | ||
return new BooleanItem(false); | ||
} | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
.../java/org/rumbledb/runtime/functions/delta_lake/DeleteDeltaLakeTableFunctionIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.rumbledb.runtime.functions.delta_lake; | ||
|
||
import org.apache.commons.io.FileUtils; | ||
import org.rumbledb.api.Item; | ||
import org.rumbledb.context.DynamicContext; | ||
import org.rumbledb.context.RuntimeStaticContext; | ||
import org.rumbledb.exceptions.CannotRetrieveResourceException; | ||
import org.rumbledb.items.BooleanItem; | ||
import org.rumbledb.runtime.AtMostOneItemLocalRuntimeIterator; | ||
import org.rumbledb.runtime.RuntimeIterator; | ||
import org.rumbledb.runtime.functions.input.FileSystemUtil; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.util.List; | ||
|
||
public class DeleteDeltaLakeTableFunctionIterator extends AtMostOneItemLocalRuntimeIterator { | ||
|
||
public DeleteDeltaLakeTableFunctionIterator( | ||
List<RuntimeIterator> arguments, | ||
RuntimeStaticContext staticContext | ||
) { | ||
super(arguments, staticContext); | ||
} | ||
|
||
@Override | ||
public Item materializeFirstItemOrNull(DynamicContext context) { | ||
RuntimeIterator urlIterator = this.children.get(0); | ||
urlIterator.open(context); | ||
String url = urlIterator.next().getStringValue(); | ||
urlIterator.close(); | ||
URI uri = FileSystemUtil.resolveURI(this.staticURI, url, getMetadata()); | ||
if (!FileSystemUtil.exists(uri, context.getRumbleRuntimeConfiguration(), getMetadata())) { | ||
throw new CannotRetrieveResourceException("File " + uri + " not found.", getMetadata()); | ||
} | ||
|
||
// URI tableURI = FileSystemUtil.resolveURIAgainstWorkingDirectory( | ||
// this.currentAnnotation.getDeltaTablePath(), | ||
// DeltaLakeConfigurationCatalogue.defaultDeltaLakeConfiguration, | ||
// ExceptionMetadata.EMPTY_METADATA | ||
// ); | ||
|
||
try { | ||
File oldTable = new File(uri.getPath()); | ||
FileUtils.deleteDirectory(oldTable); | ||
return new BooleanItem(true); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
return new BooleanItem(false); | ||
} | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
src/main/java/org/rumbledb/runtime/functions/delta_lake/DeltaLakeConfigurationCatalogue.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package org.rumbledb.runtime.functions.delta_lake; | ||
|
||
import org.rumbledb.config.RumbleRuntimeConfiguration; | ||
|
||
public class DeltaLakeConfigurationCatalogue { | ||
static final RumbleRuntimeConfiguration defaultDeltaLakeConfiguration = new RumbleRuntimeConfiguration( | ||
new String[] { | ||
"--print-iterator-tree", | ||
"yes", | ||
"--output-format", | ||
"delta", | ||
"--show-error-info", | ||
"yes", | ||
"--apply-updates", | ||
"yes", | ||
} | ||
); | ||
|
||
static final RumbleRuntimeConfiguration createDeltaLakeConfiguration = new RumbleRuntimeConfiguration( | ||
new String[] { | ||
"--print-iterator-tree", | ||
"yes", | ||
"--output-format", | ||
"delta", | ||
"--show-error-info", | ||
"yes", | ||
"--apply-updates", | ||
"yes", | ||
} | ||
); | ||
|
||
} |
4 changes: 4 additions & 0 deletions
4
...sources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaArrayDelete.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,7]; Output="[ "SUCCESS" ]" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return delete json $data.new_array[[1]]; | ||
delta-file("./tempDeltaTable").new_array |
4 changes: 4 additions & 0 deletions
4
...sources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaArrayInsert.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,5]; Output="SUCCESS" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return insert json "SUCCESS" into $data.new_array at position 1; | ||
delta-file("./tempDeltaTable").new_array[[1]] |
4 changes: 4 additions & 0 deletions
4
...ources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaArrayReplace.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,6]; Output="DOUBLE SUCCESS" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return replace value of json $data.new_array[[1]] with "DOUBLE SUCCESS"; | ||
delta-file("./tempDeltaTable").new_array[[1]] |
4 changes: 4 additions & 0 deletions
4
...ources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaObjectDelete.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,4]; Output="null" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return delete json $data.success; | ||
delta-file("./tempDeltaTable").success |
4 changes: 4 additions & 0 deletions
4
...ources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaObjectInsert.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,1]; Output="(SUCCESS, [ "SUCCESS" ])" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return (insert json "new_ins" : "SUCCESS" into $data, insert json "new_array" : ["SUCCESS"] into $data); | ||
(delta-file("./tempDeltaTable").new_ins, delta-file("./tempDeltaTable").new_array) |
4 changes: 4 additions & 0 deletions
4
...ources/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaObjectRename.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,3]; Output="DOUBLE SUCCESS" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return rename json $data.new_ins as "success"; | ||
delta-file("./tempDeltaTable").success |
4 changes: 4 additions & 0 deletions
4
...urces/test_files/runtime-delta-updates/delta-lake-functions/FunctionDeltaObjectReplace.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,2]; Output="DOUBLE SUCCESS" :) | ||
let $data := delta-file("./tempDeltaTable") | ||
return replace value of json $data.new_ins with "DOUBLE SUCCESS"; | ||
delta-file("./tempDeltaTable").new_ins |
3 changes: 3 additions & 0 deletions
3
...es/test_files/runtime-delta-updates/delta-lake-functions/FunctionsCreateDeltaLakeTable.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,0]; Output="true" :) | ||
let $ret := create-delta-lake-table("./tempDeltaTable") | ||
return $ret |
3 changes: 3 additions & 0 deletions
3
...es/test_files/runtime-delta-updates/delta-lake-functions/FunctionsDeleteDeltaLakeTable.jq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
(:JIQS: ShouldRun; UpdateDim=[5,8]; Output="true" :) | ||
let $ret := delete-delta-lake-table("./tempDeltaTable") | ||
return $ret |