From c76bbde2797b0419c031e0e2e3c8c46dba75d379 Mon Sep 17 00:00:00 2001 From: Jan Pfeifer Date: Mon, 7 Oct 2024 12:09:16 +0200 Subject: [PATCH] Fixed variable declaration with tuple returning functions #131. Added unit tests and functional tests in `nbtests` package. --- cmd/nbexec/nbexec.go | 3 +- docs/CHANGELOG.md | 4 + examples/tests/vartuple.ipynb | 133 +++++++++++++++++++++++++++++++ internal/goexec/composer.go | 18 ++++- internal/goexec/goexec.go | 35 +++++++- internal/goexec/parser.go | 26 +++++- internal/goexec/parser_test.go | 9 ++- internal/nbtests/nbtests.go | 2 +- internal/nbtests/nbtests_test.go | 41 ++++++++++ 9 files changed, 262 insertions(+), 9 deletions(-) create mode 100644 examples/tests/vartuple.ipynb diff --git a/cmd/nbexec/nbexec.go b/cmd/nbexec/nbexec.go index 59965af..a1b86d3 100644 --- a/cmd/nbexec/nbexec.go +++ b/cmd/nbexec/nbexec.go @@ -89,8 +89,7 @@ func main() { // Start Jupyter Notebook startJupyterNotebook() if jupyterDone == nil || jupyterDone.Test() { - klog.Errorf("The program `jupyter notebook` failed to run, notebook was not executed!") - return + klog.Fatal("The program `jupyter notebook` failed to run, notebook was not executed!") } go func() { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b01e792..a0df29f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,5 +1,9 @@ # GoNB Changelog +## Next + +* Issue #131: proper handling of tuple variable declarations like `var contents, _ = os.ReadFile(...)` + ## 0.10.3, Simple go1.23 update. * go1.23, and update dependencies. diff --git a/examples/tests/vartuple.ipynb b/examples/tests/vartuple.ipynb new file mode 100644 index 0000000..8a94e10 --- /dev/null +++ b/examples/tests/vartuple.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8278e544-c48b-4f34-a151-9b58ac31ae19", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a=3, c=7\n" + ] + } + ], + "source": [ + "func tupleFn() (x, y, z int) {\n", + " return 3, 5, 7\n", + "}\n", + "\n", + "var a, _, c = tupleFn()\n", + "\n", + "%%\n", + "fmt.Printf(\"a=%d, c=%d\\n\", a, c)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "621d6604-e016-40a9-9ccd-734747e70187", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Memorized Definitions

\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

Variables

\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

Functions

\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%ls" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bfdcc7f3-a282-4db9-a0dd-e8e58362fd9e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c=1.5\n" + ] + } + ], + "source": [ + "var c = float32(1.5)\n", + "\n", + "%%\n", + "fmt.Printf(\"c=%g\\n\", c)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9ba8abf4-e15c-488b-a55d-740aaf24e1b9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ". key \"a\" not found in any definition, not removed\n" + ] + } + ], + "source": [ + "// Definition of `a` must have disappeared, when c was redefined.\n", + "%rm a" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Go (gonb)", + "language": "go", + "name": "gonb" + }, + "language_info": { + "codemirror_mode": "", + "file_extension": ".go", + "mimetype": "", + "name": "go", + "nbconvert_exporter": "", + "pygments_lexer": "", + "version": "go1.23.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/internal/goexec/composer.go b/internal/goexec/composer.go index d50db38..e636fcf 100644 --- a/internal/goexec/composer.go +++ b/internal/goexec/composer.go @@ -20,7 +20,7 @@ type CellIdAndLine struct { Id, Line int } -// MakeFileToCellIdAndLine converts a cellId slice of cell line numbers for a file to a slice of CellIdAndLine. +// MakeFileToCellIdAndLine converts a cellId and a slice of cell line numbers for a file to a slice of CellIdAndLine. func MakeFileToCellIdAndLine(cellId int, fileToCellLine []int) (fileToCellIdAndLine []CellIdAndLine) { fileToCellIdAndLine = make([]CellIdAndLine, len(fileToCellLine)) for ii, line := range fileToCellLine { @@ -146,6 +146,11 @@ func (d *Declarations) RenderVariables(w *WriterWithCursor, fileToCellIdAndLine w.Write("var (\n") for _, key := range SortedKeys(d.Variables) { varDecl := d.Variables[key] + isTuple := len(varDecl.TupleDefinitions) > 0 + if isTuple && varDecl.TupleDefinitions[0] != varDecl { + // We only render the first variable of the tuple. + continue + } w.Write("\t") fileToCellIdAndLine = w.FillLinesGap(fileToCellIdAndLine) fileToCellIdAndLine = varDecl.CellLines.Append(fileToCellIdAndLine) @@ -153,6 +158,17 @@ func (d *Declarations) RenderVariables(w *WriterWithCursor, fileToCellIdAndLine cursor = w.CursorPlusDelta(varDecl.Cursor) } w.Write(varDecl.Name) + + if isTuple { + for _, otherVar := range varDecl.TupleDefinitions[1:] { + w.Write(", ") + if otherVar.CursorInName { + cursor = w.CursorPlusDelta(otherVar.Cursor) + } + w.Write(otherVar.Name) + } + } + if varDecl.TypeDefinition != "" { w.Write(" ") if varDecl.CursorInType { diff --git a/internal/goexec/goexec.go b/internal/goexec/goexec.go index ff6472d..a5dda94 100644 --- a/internal/goexec/goexec.go +++ b/internal/goexec/goexec.go @@ -17,6 +17,7 @@ import ( "os/exec" "path" "regexp" + "slices" ) const ( @@ -269,7 +270,7 @@ func (d *Declarations) Copy() *Declarations { func (d *Declarations) MergeFrom(d2 *Declarations) { copyMap(d.Imports, d2.Imports) copyMap(d.Functions, d2.Functions) - copyMap(d.Variables, d2.Variables) + variablesCopyFrom(d.Variables, d2.Variables) copyMap(d.Types, d2.Types) copyMap(d.Constants, d2.Constants) } @@ -280,6 +281,23 @@ func copyMap[K comparable, V any](dst, src map[K]V) { } } +// variablesCopyFrom is similar to copyMap above, but it handles the special tuple cases. +func variablesCopyFrom(dst, src map[string]*Variable) { + for k, v := range src { + if oldV, found := dst[k]; found { + // If there is a previous variable with the same name, tied to a tuple that is not the same + // tuple that is being merged, then remove all the definitions of the previous tuple. + if oldV.TupleDefinitions != nil && !slices.Equal(oldV.TupleDefinitions, v.TupleDefinitions) { + for _, removeV := range oldV.TupleDefinitions { + // One of the removeV will be equal to oldV, which is fine. + delete(dst, removeV.Key) + } + } + } + dst[k] = v + } +} + // ClearCursor wherever declaration it may be. func (d *Declarations) ClearCursor() { clearCursor(d.Imports) @@ -387,6 +405,18 @@ type Function struct { } // Variable definition, parsed from a notebook cell. +// +// There is one special case, where one variable entry will define multiple variables, when we have line like: +// +// var a, b, c = someFuncThatReturnsATuple() +// +// For such cases, one set TupleDefinitions in order ("a", "b", "c"). And we define the following behavior: +// +// - Only the first element of TupleDefinitions is rendered, but it renders the tuple definition. +// - If any of the elements of the tuple is redefined or removed, all elements are removed. +// +// This means that if "a" is redefined, "b" and "c" disappear. And that if "b" or "c" are redefined, it will +// yield and error, that is subtle to track. type Variable struct { Cursor CellLines @@ -394,6 +424,9 @@ type Variable struct { CursorInName, CursorInType, CursorInValue bool Key, Name string TypeDefinition, ValueDefinition string // Type definition may be empty. + + // TupleDefinitions are present when multiple variables are tied to the same definition as in `var a, b, c = someFunc()`. + TupleDefinitions []*Variable } // TypeDecl definition, parsed from a notebook cell. diff --git a/internal/goexec/parser.go b/internal/goexec/parser.go index 2dd650d..ff1e01d 100644 --- a/internal/goexec/parser.go +++ b/internal/goexec/parser.go @@ -10,7 +10,6 @@ import ( "math/rand" "os" "regexp" - "strconv" "strings" . "github.com/janpfeifer/gonb/common" @@ -261,6 +260,18 @@ func (pi *parseInfo) ParseFuncEntry(decls *Declarations, funcDecl *ast.FuncDecl) } // ParseVarEntry registers a new `var` declaration based on the ast.GenDecl. See State.parseFromGoCode +// +// There are a many variations to consider: +// +// (1) var v Type -> One variable, one type +// (2) var v1, v2 Type -> N variables, 1 type +// (3) var v1, v2 = 0, 1 -> N variables, N values +// (4) var v1, v2 int = 0, 1 -> N variables, 1 type, N values +// (5) var c, _ = os.ReadFile(...) -> N variables, one function that returns tuple. +// +// Case (5) is the most complicated, because the N variables are tied to one definition. +// We key it on the first variable, but this may cause issues with some code like `var a, b = someFunc()` +// and later the user decides to redefine `var b = 10`. It will require manual removal of the definition of `a`. func (pi *parseInfo) ParseVarEntry(decls *Declarations, genDecl *ast.GenDecl) { // Multiple declarations in the same line may share the cursor (e.g: `var a, b int` if the cursor // is in the `int` token). Only the first definition ('a' in the example) takes the cursor. @@ -274,9 +285,20 @@ func (pi *parseInfo) ParseVarEntry(decls *Declarations, genDecl *ast.GenDecl) { typeDefinition = pi.extractContentOfNode(vType) cursorInType = pi.getCursor(vType) } + + isTuple := len(vSpec.Names) > 0 && len(vSpec.Values) == 1 + var tupleDefinitions []*Variable + if isTuple { + tupleDefinitions = make([]*Variable, len(vSpec.Names)) + } + // Each spec may be a list of variables (comma separated). for nameIdx, name := range vSpec.Names { v := &Variable{Name: name.Name, TypeDefinition: typeDefinition} + if isTuple { + v.TupleDefinitions = tupleDefinitions + tupleDefinitions[nameIdx] = v + } if !cursorFound { if c := pi.getCursor(name); c.HasCursor() { v.CursorInName = true @@ -304,7 +326,7 @@ func (pi *parseInfo) ParseVarEntry(decls *Declarations, genDecl *ast.GenDecl) { v.CellLines = pi.calculateCellLines(vSpec) if v.Name == "_" { // Each un-named reference has a unique key. - v.Key = "_~" + strconv.Itoa(rand.Int()%0xFFFF) + v.Key = "_~" + fmt.Sprintf("%06d", rand.Int63n(1_000_000)) } decls.Variables[v.Key] = v } diff --git a/internal/goexec/parser_test.go b/internal/goexec/parser_test.go index 6b18f10..ce0166a 100644 --- a/internal/goexec/parser_test.go +++ b/internal/goexec/parser_test.go @@ -117,6 +117,8 @@ type Model[T any] struct { !echo nonono +var contents, _ = os.ReadFile("/tmp/a") + %% fmt.Printf("Hello! %s\n", c) fmt.Printf("1 + 3 = %d\n", sum(1, 3)) @@ -159,16 +161,17 @@ func TestState_Parse(t *testing.T) { assert.Contains(t, s.Definitions.Functions, "Kg~Gain") assert.Contains(t, s.Definitions.Functions, "N~Weight") assert.Contains(t, s.Definitions.Functions, "main") - assert.ElementsMatch(t, []int{71, 71, 72, 73, 74, 75, -1, -1}, s.Definitions.Functions["main"].CellLines.Lines, + assert.ElementsMatch(t, []int{73, 73, 74, 75, 76, 77, -1, -1}, s.Definitions.Functions["main"].CellLines.Lines, "Index to line numbers in original cell don't match.") fmt.Printf("\ttest variables: %+v\n", s.Definitions.Variables) - assert.Lenf(t, s.Definitions.Variables, 6, "Expected 4 variables, got %+v", s.Definitions.Variables) + assert.Lenf(t, s.Definitions.Variables, 8, "Expected 6 variables, got %d: %+v", len(s.Definitions.Variables), s.Definitions.Variables) assert.Contains(t, s.Definitions.Variables, "x") assert.Contains(t, s.Definitions.Variables, "y") assert.Contains(t, s.Definitions.Variables, "z") assert.Contains(t, s.Definitions.Variables, "b") assert.Contains(t, s.Definitions.Variables, "c") + assert.Contains(t, s.Definitions.Variables, "contents") // The 5th var is "_", which gets a random key. assert.ElementsMatch(t, []int{21, 22}, s.Definitions.Variables["b"].CellLines.Lines, "Index to line numbers in original cell don't match.") @@ -230,6 +233,7 @@ func TestState_Parse(t *testing.T) { b = math.Sqrt(30.0 + 34.0) c = "blah, blah, blah" + contents, _ = os.ReadFile("/tmp/a") x float32 = 1 y float32 = 2 z float64 @@ -251,6 +255,7 @@ func TestState_Parse(t *testing.T) { {cellId, 20}, {cellId, 20}, {cellId, 25}, + {cellId, 71}, // var contents, _ = os.ReadFile("/tmp/a") }, fileToCellIdAndLine, "Line numbers in cell code don't match") // Checks functions rendering. diff --git a/internal/nbtests/nbtests.go b/internal/nbtests/nbtests.go index 13c0755..6116e30 100644 --- a/internal/nbtests/nbtests.go +++ b/internal/nbtests/nbtests.go @@ -32,7 +32,7 @@ func GoNBRootDir() string { // variable JUPYTER_DATA_DIR to that directory, and compile and install // GoNB to that directory, so it will be used by `nbconvert`. // -// It also compiles `nbexec` to the same directory, which is used to controle the +// It also compiles `nbexec` to the same directory, which is used to control the // execution of the notebooks. // // Parameters: diff --git a/internal/nbtests/nbtests_test.go b/internal/nbtests/nbtests_test.go index 9da088f..4735aa8 100644 --- a/internal/nbtests/nbtests_test.go +++ b/internal/nbtests/nbtests_test.go @@ -671,3 +671,44 @@ func TestGonbui(t *testing.T) { require.NoError(t, os.Remove(f.Name())) clearNotebook(t, notebook) } + +func TestVarTuple(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration (nbconvert) test for short tests.") + return + } + f := executeNotebook(t, "vartuple") + err := Check(f, + Sequence( + Match(OutputLine(1), + Separator, + "a=3, c=7", + Separator), + + Match(OutputLine(2), + Separator), + // Here the Markdown output is all scrambled, so I tried to match some individual lines -- there is + // garbage (from the conversion to ASCII) in-between. + Match("== Variables"), + Match("a"), + Match("c"), + Match("== Functions"), + + Match(OutputLine(3), + Separator, + "c=1.5", + Separator), + + // Match failure of doing `%rm a`, since `a` definition must have been removed. + Match(OutputLine(4), + Separator, + `. key "a" not found in any definition, not removed`, + Separator), + ), + *flagPrintNotebook) + + require.NoError(t, err) + require.NoError(t, f.Close()) + require.NoError(t, os.Remove(f.Name())) + clearNotebook(t, "vartuple") +}