Merge pull request #937 from SciML/ap/boltz_up

refactor: use layers from `Boltz`
SciML · Sep 13, 2024 · d2d2b47 · d2d2b47 · avik-pal · Sep 13, 2024
2 parents ca29849 + 4cb101c
commit d2d2b47
Show file tree

Hide file tree

Showing 23 changed files with 136 additions and 351 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -5,6 +5,9 @@ steps:
           version: "1.10"
       - JuliaCI/julia-test#v1:
           coverage: true
+          dirs:
+            - src
+            - ext
     agents:
       queue: "juliagpu"
       cuda: "*"

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -47,6 +47,8 @@ jobs:
         env:
           GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1
+        with:
+          directories: src,ext
       - uses: codecov/codecov-action@v4
         with:
           file: lcov.info

diff --git a/Project.toml b/Project.toml
@@ -1,16 +1,14 @@
 name = "DiffEqFlux"
 uuid = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
 authors = ["Chris Rackauckas <[email protected]>"]
-version = "3.5.2"
+version = "3.6.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+Boltz = "4544d5e4-abc5-4dea-817f-29e4c205d9c8"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
-DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
-ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
@@ -20,22 +18,28 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[weakdeps]
+DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
+
+[extensions]
+DiffEqFluxDataInterpolationsExt = "DataInterpolations"
 
 [compat]
 ADTypes = "1.5"
 Aqua = "0.8.7"
 BenchmarkTools = "1.5.0"
+Boltz = "0.4.2"
 ChainRulesCore = "1"
 ComponentArrays = "0.15.17"
 ConcreteStructs = "0.2"
-DataInterpolations = "< 5.3"
+DataInterpolations = "5, 6"
 DelayDiffEq = "5.47.3"
 DiffEqCallbacks = "3.6.2"
 Distances = "0.10.11"
 Distributed = "1.10"
 Distributions = "0.25"
-DistributionsAD = "0.6"
+DistributionsAD = "0.6.55"
 ExplicitImports = "1.9"
 Flux = "0.14.15"
 ForwardDiff = "0.10"
@@ -71,12 +75,15 @@ julia = "1.10"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 DelayDiffEq = "bcd4f6db-9728-5f36-b5f7-82caef46ccdb"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
@@ -95,6 +102,7 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "ExplicitImports", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "MLDatasets", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test"]
+test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "MLDatasets", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"]
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ by helping users put diffeq solvers into neural networks. This package utilizes
 [Scientific Machine Learning](https://www.stochasticlifestyle.com/the-essential-tools-of-scientific-machine-learning-scientific-ml/), specifically neural differential equations to add physical information into traditional machine learning.
 
 > [!NOTE]
-> We maintain backwards compatibility with [Flux.jl](https://docs.sciml.ai/Flux/stable/) via [FromFluxAdaptor()](https://lux.csail.mit.edu/stable/api/Lux/interop#Lux.FromFluxAdaptor)
+> We maintain backwards compatibility with [Flux.jl](https://fluxml.ai/Flux.jl/stable/) via [FromFluxAdaptor()](https://lux.csail.mit.edu/stable/api/Lux/interop#Lux.FromFluxAdaptor)
 
 ## Tutorials and Documentation
 
@@ -61,7 +61,17 @@ explore various ways to integrate the two methodologies:
 
 ![Flux ODE Training Animation](https://user-images.githubusercontent.com/1814174/88589293-e8207f80-d026-11ea-86e2-8a3feb8252ca.gif)
 
-## Breaking Changes in v3
+## Breaking Changes
+
+### v4 (upcoming)
+
+  - `TensorLayer` has been removed, use `Boltz.Layers.TensorProductLayer` instead.
+  - Basis functions in DiffEqFlux have been removed in favor of `Boltz.Basis` module.
+  - `SplineLayer` has been removed, use `Boltz.Layers.SplineLayer` instead.
+  - `NeuralHamiltonianDE` has been removed, use `NeuralODE` with `Layers.HamiltonianNN` instead.
+  - `HamiltonianNN` has been removed in favor of `Layers.HamiltonianNN`.
+
+### v3
 
   - Flux dependency is dropped. If a non Lux `AbstractExplicitLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`.
   - `Flux` is no longer re-exported from `DiffEqFlux`. Instead we reexport `Lux`.

diff --git a/docs/pages.jl b/docs/pages.jl
@@ -18,12 +18,8 @@ pages = [
         "examples/physical_constraints.md",
     ],
     "Layer APIs" => Any[
-        "Classical Basis Layers" => "layers/BasisLayers.md",
-        "Tensor Product Layer" => "layers/TensorLayer.md",
         "Continuous Normalizing Flows Layer" => "layers/CNFLayer.md",
-        "Spline Layer" => "layers/SplineLayer.md",
         "Neural Differential Equation Layers" => "layers/NeuralDELayers.md",
-        "Hamiltonian Neural Network Layer" => "layers/HamiltonianNN.md"
     ],
     "Utility Function APIs" => Any[
         "Smoothed Collocation" => "utilities/Collocation.md",

diff --git a/docs/src/examples/mnist_conv_neural_ode.md b/docs/src/examples/mnist_conv_neural_ode.md
@@ -22,7 +22,7 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true
 
 function loadmnist(batchsize)
     # Load MNIST
-    dataset = MNIST(; split = :train)
+    dataset = MNIST(; split = :train)[1:2000] # Partial load for demonstration
     imgs = dataset.features
     labels_raw = dataset.targets
 
@@ -114,6 +114,5 @@ end
 # Train the NN-ODE and monitor the loss and weights.
 res = Optimization.solve(opt_prob, opt, dataloader; maxiters = 5, callback)
 acc = accuracy(m, dataloader, res.u, st)
-@assert acc > 0.8 # hide
 acc # hide
 ```
diff --git a/docs/src/examples/mnist_neural_ode.md b/docs/src/examples/mnist_neural_ode.md
@@ -20,7 +20,7 @@ logitcrossentropy = CrossEntropyLoss(; logits = Val(true))
 
 function loadmnist(batchsize)
     # Load MNIST
-    dataset = MNIST(; split = :train)
+    dataset = MNIST(; split = :train)[1:2000] # Partial load for demonstration
     imgs = dataset.features
     labels_raw = dataset.targets
 
@@ -104,7 +104,7 @@ end
 
 # Train the NN-ODE and monitor the loss and weights.
 res = Optimization.solve(opt_prob, opt, dataloader; callback, maxiters = 5)
-@assert accuracy(m, dataloader, res.u, st) > 0.8
+accuracy(m, dataloader, res.u, st)
 ```
 
 ## Step-by-Step Description
@@ -151,7 +151,7 @@ logitcrossentropy = CrossEntropyLoss(; logits = Val(true))
 
 function loadmnist(batchsize)
     # Load MNIST
-    dataset = MNIST(; split = :train)
+    dataset = MNIST(; split = :train)[1:2000] # Partial load for demonstration
     imgs = dataset.features
     labels_raw = dataset.targets
 
@@ -221,6 +221,12 @@ st = st |> gdev;
 ```
 
 ```@example mnist
+# We can also build the model topology without a NN-ODE
+m_no_ode = Chain(; down, nn, fc)
+ps_no_ode, st_no_ode = Lux.setup(Xoshiro(0), m_no_ode);
+ps_no_ode = ComponentArray(ps_no_ode) |> gdev;
+st_no_ode = st_no_ode |> gdev;
+
 x_train1, y_train1 = first(dataloader)
 
 # To understand the intermediate NN-ODE layer, we can examine it's dimensionality
@@ -324,7 +330,5 @@ for Neural ODE is given by `nn_ode.p`:
 ```@example mnist
 # Train the NN-ODE and monitor the loss and weights.
 res = Optimization.solve(opt_prob, opt, dataloader; callback, maxiters = 5)
-acc = accuracy(m, dataloader, res.u, st)
-@assert acc > 0.8 # hide
-acc # hide
+accuracy(m, dataloader, res.u, st)
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -18,7 +18,7 @@ and helper functions to make training such deep implicit layer models fast and e
 The approach of this package is the easy and efficient training of
 [Neural Ordinary Differential Equations](https://arxiv.org/abs/1806.07366) and its variants.
 DiffEqFlux.jl provides architectures which match the interfaces of
-machine learning libraries such as [Flux.jl](https://docs.sciml.ai/Flux/stable/)
+machine learning libraries such as [Flux.jl](https://fluxml.ai/Flux.jl/stable/)
 and [Lux.jl](https://lux.csail.mit.edu/stable/)
 to make it easy to build continuous-time machine learning layers
 into larger machine learning applications.

diff --git a/docs/src/layers/BasisLayers.md b/docs/src/layers/BasisLayers.md
diff --git a/docs/src/layers/HamiltonianNN.md b/docs/src/layers/HamiltonianNN.md
diff --git a/docs/src/layers/SplineLayer.md b/docs/src/layers/SplineLayer.md
diff --git a/docs/src/layers/TensorLayer.md b/docs/src/layers/TensorLayer.md
diff --git a/ext/DiffEqFluxDataInterpolationsExt.jl b/ext/DiffEqFluxDataInterpolationsExt.jl
@@ -0,0 +1,19 @@
+module DiffEqFluxDataInterpolationsExt
+
+using DataInterpolations: DataInterpolations
+using DiffEqFlux: DiffEqFlux
+
+@views function DiffEqFlux.collocate_data(
+        data::AbstractMatrix{T}, tpoints::AbstractVector{T},
+        tpoints_sample::AbstractVector{T}, interp, args...) where {T}
+    u = zeros(T, size(data, 1), length(tpoints_sample))
+    du = zeros(T, size(data, 1), length(tpoints_sample))
+    for d1 in axes(data, 1)
+        interpolation = interp(data[d1, :], tpoints, args...)
+        u[d1, :] .= interpolation.(tpoints_sample)
+        du[d1, :] .= DataInterpolations.derivative.((interpolation,), tpoints_sample)
+    end
+    return du, u
+end
+
+end
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
@@ -3,10 +3,7 @@ module DiffEqFlux
 using ADTypes: ADTypes, AutoForwardDiff, AutoZygote
 using ChainRulesCore: ChainRulesCore
 using ConcreteStructs: @concrete
-using DataInterpolations: DataInterpolations
 using Distributions: Distributions, ContinuousMultivariateDistribution, Distribution, logpdf
-using DistributionsAD: DistributionsAD
-using ForwardDiff: ForwardDiff
 using LinearAlgebra: LinearAlgebra, Diagonal, det, tr, mul!
 using Lux: Lux, Chain, Dense, StatefulLuxLayer, FromFluxAdaptor
 using LuxCore: LuxCore, AbstractExplicitLayer, AbstractExplicitContainerLayer
@@ -23,26 +20,25 @@ using SciMLSensitivity: SciMLSensitivity, AdjointLSS, BacksolveAdjoint, EnzymeVJ
                         SteadyStateAdjoint, TrackerAdjoint, TrackerVJP, ZygoteAdjoint,
                         ZygoteVJP
 using Setfield: @set!
-using Zygote: Zygote
 
 const CRC = ChainRulesCore
 
-@reexport using ADTypes, Lux
+@reexport using ADTypes, Lux, Boltz
+
+fixed_state_type(_) = true
+fixed_state_type(::Layers.HamiltonianNN{FST}) where {FST} = FST
 
 include("ffjord.jl")
 include("neural_de.jl")
-include("spline_layer.jl")
-include("tensor_product.jl")
+
 include("collocation.jl")
-include("hnn.jl")
 include("multiple_shooting.jl")
 
+include("deprecated.jl")
+
 export NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, AugmentedNDELayer,
-       NeuralODEMM, TensorLayer, SplineLayer
-export NeuralHamiltonianDE, HamiltonianNN
+       NeuralODEMM
 export FFJORD, FFJORDDistribution
-export TensorProductBasisFunction, ChebyshevBasis, SinBasis, CosBasis, FourierBasis,
-       LegendreBasis, PolynomialBasis
 export DimMover
 
 export EpanechnikovKernel, UniformKernel, TriangularKernel, QuarticKernel, TriweightKernel,

diff --git a/src/collocation.jl b/src/collocation.jl
@@ -106,15 +106,3 @@ end
     du, u = collocate_data(reshape(data, 1, :), tpoints, tpoints_sample, interp, args...)
     return du[1, :], u[1, :]
 end
-
-@views function collocate_data(data::AbstractMatrix{T}, tpoints::AbstractVector{T},
-        tpoints_sample::AbstractVector{T}, interp, args...) where {T}
-    u = zeros(T, size(data, 1), length(tpoints_sample))
-    du = zeros(T, size(data, 1), length(tpoints_sample))
-    for d1 in axes(data, 1)
-        interpolation = interp(data[d1, :], tpoints, args...)
-        u[d1, :] .= interpolation.(tpoints_sample)
-        du[d1, :] .= DataInterpolations.derivative.((interpolation,), tpoints_sample)
-    end
-    return du, u
-end
diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -0,0 +1,47 @@
+# Tensor Layer
+Base.@deprecate TensorProductBasisFunction(f, n) Basis.GeneralBasisFunction{:none}(f, n, 1)
+
+for B in (:Chebyshev, :Sin, :Cos, :Fourier, :Legendre, :Polynomial)
+    Bold = Symbol(B, :Basis)
+    @eval Base.@deprecate $(Bold)(n) Basis.$(B)(n)
+end
+
+Base.@deprecate TensorLayer(model, out_dim::Int, init_p::F = randn) where {F <: Function} Boltz.Layers.TensorProductLayer(
+    model, out_dim; init_weight = init_p)
+
+# Spline Layer
+function SplineLayer(tspan, tstep, spline_basis; init_saved_points::F = nothing) where {F}
+    Base.depwarn(
+        "SplineLayer is deprecated and will be removed in the next major release. Refer to \
+         Boltz.jl `Layers.SplineLayer` for the newer version.",
+        :SplineLayer)
+
+    init_saved_points_corrected = if init_saved_points === nothing
+        nothing
+    else
+        let init_saved_points = init_saved_points
+            (rng, _, grid_min, grid_max, grid_step) -> begin
+                return init_saved_points(rng, (grid_min, grid_max), grid_step)
+            end
+        end
+    end
+
+    return Layers.SplineLayer((), first(tspan), last(tspan), tstep, spline_basis;
+        init_saved_points = init_saved_points_corrected)
+end
+
+export SplineLayer
+
+# Hamiltonian Neural Network
+Base.@deprecate HamiltonianNN(model; ad = AutoZygote()) Layers.HamiltonianNN{true}(
+    model; autodiff = ad)
+
+function NeuralHamiltonianDE(model, tspan, args...; ad = AutoForwardDiff(), kwargs...)
+    Base.depwarn(
+        "NeuralHamiltonianDE is deprecated, use `NeuralODE` with `Layers.HamiltonianNN` instead.",
+        :NeuralHamiltonianDE)
+    hnn = model isa Layers.HamiltonianNN ? model : HamiltonianNN(model; ad)
+    return NeuralODE(hnn, tspan, args, kwargs)
+end
+
+export NeuralHamiltonianDE
diff --git a/src/ffjord.jl b/src/ffjord.jl
@@ -132,7 +132,7 @@ function __forward_ffjord(n::FFJORD, x::AbstractArray{T, N}, ps, st) where {T, N
     (; regularize, monte_carlo) = st
     sensealg = InterpolatingAdjoint(; autojacvec = ZygoteVJP())
 
-    model = StatefulLuxLayer{true}(n.model, nothing, st.model)
+    model = StatefulLuxLayer{fixed_state_type(n.model)}(n.model, nothing, st.model)
 
     ffjord(u, p, t) = __ffjord(model, u, p, n.ad, regularize, monte_carlo)