diff --git a/benchmarks/benchFourierLayer.jl b/benchmarks/benchFourierLayer.jl index 71e6e31..99ebf46 100644 --- a/benchmarks/benchFourierLayer.jl +++ b/benchmarks/benchFourierLayer.jl @@ -1,10 +1,10 @@ -# Stolen from Flux as well - -for n in [2, 20, 200, 2000] - x = randn(Float32, n, 2000, 100) - model = FourierLayer(n, n, 2000, 100, 16) - println("CPU n=$n") - run_benchmark(model, x, cuda=false) - println("CUDA n=$n") - run_benchmark(model, x, cuda=true) -end \ No newline at end of file +# Stolen from Flux as well + +for n in [2, 20, 200, 2000] + x = randn(Float32, n, 2000, 100) + model = FourierLayer(n, n, 2000, 100, 16) + println("CPU n=$n") + run_benchmark(model, x, cuda = false) + println("CUDA n=$n") + run_benchmark(model, x, cuda = true) +end diff --git a/benchmarks/bench_utils.jl b/benchmarks/bench_utils.jl index a20d019..1802802 100644 --- a/benchmarks/bench_utils.jl +++ b/benchmarks/bench_utils.jl @@ -1,51 +1,55 @@ -using BenchmarkTools -using Flux -using OperatorLearning -using CUDA -using Zygote: pullback - -# Stolen from Flux - -fw(m, x) = m(x) -bw(back) = back(1f0) -fwbw(m, ps, x) = gradient(() -> sum(m(x)), ps) - -function run_benchmark(model, x; cuda=true) - - if cuda - model = model |> gpu - x = x |> gpu - end - - ps = Flux.params(model) - y, back = pullback(() -> sum(model(x)), ps) - - - if cuda - CUDA.allowscalar(false) - # CUDA.device!(3) - println(" forward") - fw(model, x); GC.gc(); CUDA.reclaim(); #warmup - @btime CUDA.@sync(fw($model, $x)) teardown=(GC.gc(); CUDA.reclaim()) - - println(" backward") - bw(back); GC.gc(); CUDA.reclaim(); #warmup - @btime CUDA.@sync(bw($back)) teardown=(GC.gc(); CUDA.reclaim()) - - println(" forw and back") - fwbw(model, ps, x); GC.gc(); CUDA.reclaim(); #warmup - @btime CUDA.@sync(fwbw($model, $ps, $x)) teardown=(GC.gc(); CUDA.reclaim()) - else - println(" forward") - fw(model, x) #warmup - @btime fw($model, $x) - - println(" backward") - bw(back) #warmup - @btime bw($back) - - println(" forw and back") - fwbw(model, ps, x) # warmup - @btime fwbw($model, $ps, $x) - end -end \ No newline at end of file +using BenchmarkTools +using Flux +using OperatorLearning +using CUDA +using Zygote: pullback + +# Stolen from Flux + +fw(m, x) = m(x) +bw(back) = back(1.0f0) +fwbw(m, ps, x) = gradient(() -> sum(m(x)), ps) + +function run_benchmark(model, x; cuda = true) + if cuda + model = model |> gpu + x = x |> gpu + end + + ps = Flux.params(model) + y, back = pullback(() -> sum(model(x)), ps) + + if cuda + CUDA.allowscalar(false) + # CUDA.device!(3) + println(" forward") + fw(model, x); + GC.gc(); + CUDA.reclaim(); #warmup + @btime CUDA.@sync(fw($model, $x)) teardown=(GC.gc(); CUDA.reclaim()) + + println(" backward") + bw(back); + GC.gc(); + CUDA.reclaim(); #warmup + @btime CUDA.@sync(bw($back)) teardown=(GC.gc(); CUDA.reclaim()) + + println(" forw and back") + fwbw(model, ps, x); + GC.gc(); + CUDA.reclaim(); #warmup + @btime CUDA.@sync(fwbw($model, $ps, $x)) teardown=(GC.gc(); CUDA.reclaim()) + else + println(" forward") + fw(model, x) #warmup + @btime fw($model, $x) + + println(" backward") + bw(back) #warmup + @btime bw($back) + + println(" forw and back") + fwbw(model, ps, x) # warmup + @btime fwbw($model, $ps, $x) + end +end diff --git a/benchmarks/runbenchmarks.jl b/benchmarks/runbenchmarks.jl index 5458037..c37f3d3 100644 --- a/benchmarks/runbenchmarks.jl +++ b/benchmarks/runbenchmarks.jl @@ -1,7 +1,7 @@ -using OperatorLearning, Flux - -versioninfo() -include("bench_utils.jl") - -@info "Benchmark FourierLayer" -include("benchFourierLayer.jl") \ No newline at end of file +using OperatorLearning, Flux + +versioninfo() +include("bench_utils.jl") + +@info "Benchmark FourierLayer" +include("benchFourierLayer.jl") diff --git a/docs/make.jl b/docs/make.jl index 12ec39e..83b5bf8 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,29 +1,28 @@ using OperatorLearning using Documenter, DocumenterTools -DocMeta.setdocmeta!(OperatorLearning, :DocTestSetup, :(using OperatorLearning); recursive=true) +DocMeta.setdocmeta!(OperatorLearning, :DocTestSetup, :(using OperatorLearning); recursive = true) makedocs(; - modules=[OperatorLearning], - authors="Patrick Zimbrod and contributors", - repo="https://github.com/pzimbrod/OperatorLearning.jl/blob/{commit}{path}#{line}", - sitename="OperatorLearning.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://pzimbrod.github.io/OperatorLearning.jl", - assets=String[], + modules = [OperatorLearning], + authors = "Patrick Zimbrod and contributors", + repo = "https://github.com/pzimbrod/OperatorLearning.jl/blob/{commit}{path}#{line}", + sitename = "OperatorLearning.jl", + format = Documenter.HTML(; + prettyurls = get(ENV, "CI", "false") == "true", + canonical = "https://pzimbrod.github.io/OperatorLearning.jl", + assets = String[] ), - pages=[ + pages = [ "Home" => "index.md", - "Examples" => - ["Burgers Equation with FNO" => "examples/burgers_FNO.md", - "Burgers Equation with DeepONet" => "examples/burgers_DeepONet.md"], + "Examples" => ["Burgers Equation with FNO" => "examples/burgers_FNO.md", + "Burgers Equation with DeepONet" => "examples/burgers_DeepONet.md"], "Frequently Asked Questions" => "faq.md", - "Module Reference" => "reference.md", - ], + "Module Reference" => "reference.md" + ] ) deploydocs(; - repo="github.com/pzimbrod/OperatorLearning.jl", - #devbranch="main", + repo = "github.com/pzimbrod/OperatorLearning.jl", +#devbranch="main", ) diff --git a/docs/src/reference.md b/docs/src/reference.md index cb13621..9b7c1f5 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,3 +1,3 @@ ```@autodocs Modules = [OperatorLearning] -``` \ No newline at end of file +``` diff --git a/examples/burgers_DeepONet.jl b/examples/burgers_DeepONet.jl index 8bc0ed5..cf8bcf8 100644 --- a/examples/burgers_DeepONet.jl +++ b/examples/burgers_DeepONet.jl @@ -21,22 +21,22 @@ subsample = 2^3; # create the x training array, according to our desired grid size xtrain = vars["a"][1:1000, 1:subsample:end]' |> device; # create the x test array -xtest = vars["a"][end-99:end, 1:subsample:end]' |> device; +xtest = vars["a"][(end - 99):end, 1:subsample:end]' |> device; # Create the y training array ytrain = vars["u"][1:1000, 1:subsample:end] |> device; # Create the y test array -ytest = vars["u"][end-99:end, 1:subsample:end] |> device; +ytest = vars["u"][(end - 99):end, 1:subsample:end] |> device; # The data is missing grid data, so we create it # `collect` converts data type `range` into an array -grid = collect(range(0, 1, length=1024))' |> device +grid = collect(range(0, 1, length = 1024))' |> device # Create the DeepONet: # IC is given on grid of 1024 points, and we solve for a fixed time t in one # spatial dimension x, making the branch input of size 1024 and trunk size 1 # We choose GeLU activation for both subnets -model = DeepONet((1024,1024,1024),(1,1024,1024),gelu,gelu) |> device +model = DeepONet((1024, 1024, 1024), (1, 1024, 1024), gelu, gelu) |> device # We use the ADAM optimizer for training learning_rate = 0.001 @@ -48,12 +48,12 @@ parameters = params(model) # The loss function # We can't use the "vanilla" implementation of the mse here since we have # two distinct inputs to our DeepONet, so we wrap them into a tuple -loss(xtrain,ytrain,sensor) = Flux.Losses.mse(model(xtrain,sensor),ytrain) +loss(xtrain, ytrain, sensor) = Flux.Losses.mse(model(xtrain, sensor), ytrain) # Define a callback function that gives some output during training -evalcb() = @show(loss(xtest,ytest,grid)) +evalcb() = @show(loss(xtest, ytest, grid)) # Print the callback only every 5 seconds throttled_cb = throttle(evalcb, 5) # Do the training loop -Flux.@epochs 500 train!(loss, parameters, [(xtrain,ytrain,grid)], opt, cb = evalcb) +Flux.@epochs 500 train!(loss, parameters, [(xtrain, ytrain, grid)], opt, cb = evalcb) diff --git a/examples/burgers_FNO.jl b/examples/burgers_FNO.jl index 896dda8..38bfdce 100644 --- a/examples/burgers_FNO.jl +++ b/examples/burgers_FNO.jl @@ -13,55 +13,57 @@ subsample = 2^3; # create the x training array, according to our desired grid size xtrain = vars["a"][1:1000, 1:subsample:end] |> device; # create the x test array -xtest = vars["a"][end-99:end, 1:subsample:end] |> device; +xtest = vars["a"][(end - 99):end, 1:subsample:end] |> device; # Create the y training array ytrain = vars["u"][1:1000, 1:subsample:end] |> device; # Create the y test array -ytest = vars["u"][end-99:end, 1:subsample:end] |> device; +ytest = vars["u"][(end - 99):end, 1:subsample:end] |> device; # The data is missing grid data, so we create it # `collect` converts data type `range` into an array -grid = collect(range(0, 1, length=length(xtrain[1,:]))) |> device +grid = collect(range(0, 1, length = length(xtrain[1, :]))) |> device # Merge the created grid with the data # Output has the dims: batch x grid points x 2 (a(x), x) # First, reshape the data to a 3D tensor, # Then, create a 3D tensor from the synthetic grid data # and concatenate them along the newly created 3rd dim -xtrain = cat(reshape(xtrain,(1000,1024,1)), - reshape(repeat(grid,1000),(1000,1024,1)); - dims=3) |> device -ytrain = cat(reshape(ytrain,(1000,1024,1)), - reshape(repeat(grid,1000),(1000,1024,1)); - dims=3) |> device +xtrain = cat(reshape(xtrain, (1000, 1024, 1)), + reshape(repeat(grid, 1000), (1000, 1024, 1)); + dims = 3) |> device +ytrain = cat(reshape(ytrain, (1000, 1024, 1)), + reshape(repeat(grid, 1000), (1000, 1024, 1)); + dims = 3) |> device # Same treatment with the test data -xtest = cat(reshape(xtest,(100,1024,1)), - reshape(repeat(grid,100),(100,1024,1)); - dims=3) |> device -ytest = cat(reshape(ytest,(100,1024,1)), - reshape(repeat(grid,100),(100,1024,1)); - dims=3) |> device +xtest = cat(reshape(xtest, (100, 1024, 1)), + reshape(repeat(grid, 100), (100, 1024, 1)); + dims = 3) |> device +ytest = cat(reshape(ytest, (100, 1024, 1)), + reshape(repeat(grid, 100), (100, 1024, 1)); + dims = 3) |> device # Our net wants the input in the form (2,grid,batch), though, # So we permute -xtrain, xtest = permutedims(xtrain,(3,2,1)), permutedims(xtest,(3,2,1)) |> device -ytrain, ytest = permutedims(ytrain,(3,2,1)), permutedims(ytest,(3,2,1)) |> device +xtrain, xtest = permutedims(xtrain, (3, 2, 1)), permutedims(xtest, (3, 2, 1)) |> device +ytrain, ytest = permutedims(ytrain, (3, 2, 1)), permutedims(ytest, (3, 2, 1)) |> device # Pass the data to the Flux DataLoader and give it a batch of 20 -train_loader = Flux.Data.DataLoader((xtrain, ytrain), batchsize=20, shuffle=true) |> device -test_loader = Flux.Data.DataLoader((xtest, ytest), batchsize=20, shuffle=false) |> device +train_loader = Flux.Data.DataLoader((xtrain, ytrain), batchsize = 20, shuffle = true) |> + device +test_loader = Flux.Data.DataLoader((xtest, ytest), batchsize = 20, shuffle = false) |> + device # Set up the Fourier Layer # 128 in- and outputs, batch size 20 as given above, grid size 1024 # 16 modes to keep, σ activation on the gpu -layer = FourierLayer(128,128,1024,16,gelu,bias_fourier=false) |> device +layer = FourierLayer(128, 128, 1024, 16, gelu, bias_fourier = false) |> device # The whole architecture # linear transform into the latent space, 4 Fourier Layers, # then transform it back -model = Chain(Dense(2,128;bias=false), layer, layer, layer, layer, - Dense(128,2;bias=false)) |> device +model = Chain(Dense(2, 128; bias = false), layer, layer, layer, layer, + Dense(128, 2; bias = false)) |> device # We use the ADAM optimizer for training learning_rate = 0.001 @@ -71,10 +73,10 @@ opt = ADAM(learning_rate) parameters = params(model) # The loss function -loss(x,y) = Flux.Losses.mse(model(x),y) +loss(x, y) = Flux.Losses.mse(model(x), y) # Define a callback function that gives some output during training -evalcb() = @show(loss(xtest,ytest)) +evalcb() = @show(loss(xtest, ytest)) # Print the callback only every 5 seconds, throttled_cb = throttle(evalcb, 5) @@ -82,10 +84,10 @@ throttled_cb = throttle(evalcb, 5) Flux.@epochs 500 train!(loss, parameters, train_loader, opt, cb = throttled_cb) # Accuracy metrics -val_loader = Flux.Data.DataLoader((xtest, ytest), batchsize=1, shuffle=false) |> device +val_loader = Flux.Data.DataLoader((xtest, ytest), batchsize = 1, shuffle = false) |> device loss = 0.0 |> device -for (x,y) in val_loader +for (x, y) in val_loader ŷ = model(x) - loss += Flux.Losses.mse(ŷ,y) + loss += Flux.Losses.mse(ŷ, y) end diff --git a/src/ComplexWeights.jl b/src/ComplexWeights.jl index e504668..f3c1b21 100644 --- a/src/ComplexWeights.jl +++ b/src/ComplexWeights.jl @@ -4,7 +4,9 @@ cglorot_uniform([rng=GLOBAL_RNG], dims...) A modification of the `glorot_uniform` function provided by `Flux` to accommodate Complex numbers. This is necessary since the parameters of the global convolution operator in the Fourier Layer generally has complex weights. """ -cglorot_uniform(rng::AbstractRNG, dims...) = (rand(rng, ComplexF32, dims...) .- 0.5f0) .* sqrt(24.0f0 / sum(nfan(dims...))) +function cglorot_uniform(rng::AbstractRNG, dims...) + (rand(rng, ComplexF32, dims...) .- 0.5f0) .* sqrt(24.0f0 / sum(nfan(dims...))) +end cglorot_uniform(dims...) = cglorot_uniform(Random.GLOBAL_RNG, dims...) cglorot_uniform(rng::AbstractRNG) = (dims...) -> cglorot_uniform(rng, dims...) @@ -14,6 +16,8 @@ cglorot_normal([rng=GLOBAL_RNG], dims...) A modification of the `glorot_normal` function provided by `Flux` to accommodate Complex numbers. This is necessary since the parameters of the global convolution operator in the Fourier Layer generally has complex weights. """ -cglorot_normal(rng::AbstractRNG, dims...) = randn(rng, ComplexF32, dims...) .* sqrt(2.0f0 / sum(nfan(dims...))) +function cglorot_normal(rng::AbstractRNG, dims...) + randn(rng, ComplexF32, dims...) .* sqrt(2.0f0 / sum(nfan(dims...))) +end cglorot_normal(dims...) = cglorot_normal(Random.GLOBAL_RNG, dims...) -cglorot_normal(rng::AbstractRNG) = (dims...) -> cglorot_normal(rng, dims...) \ No newline at end of file +cglorot_normal(rng::AbstractRNG) = (dims...) -> cglorot_normal(rng, dims...) diff --git a/src/DeepONet.jl b/src/DeepONet.jl index 2580bb3..9223bd5 100644 --- a/src/DeepONet.jl +++ b/src/DeepONet.jl @@ -77,20 +77,19 @@ end # Declare the function that assigns Weights and biases to the layer function DeepONet(architecture_branch::Tuple, architecture_trunk::Tuple, - act_branch = identity, act_trunk = identity; - init_branch = Flux.glorot_uniform, - init_trunk = Flux.glorot_uniform, - bias_branch=true, bias_trunk=true) - + act_branch = identity, act_trunk = identity; + init_branch = Flux.glorot_uniform, + init_trunk = Flux.glorot_uniform, + bias_branch = true, bias_trunk = true) @assert architecture_branch[end] == architecture_trunk[end] "Branch and Trunk net must share the same amount of nodes in the last layer. Otherwise Σᵢ bᵢⱼ tᵢₖ won't work." # To construct the subnets we use the helper function in subnets.jl # Initialize the branch net branch_net = construct_subnet(architecture_branch, act_branch; - init=init_branch, bias=bias_branch) + init = init_branch, bias = bias_branch) # Initialize the trunk net trunk_net = construct_subnet(architecture_trunk, act_trunk; - init=init_trunk, bias=bias_trunk) + init = init_trunk, bias = bias_trunk) return DeepONet(branch_net, trunk_net) end @@ -113,13 +112,14 @@ function (a::DeepONet)(x::AbstractArray, y::AbstractVecOrMat) end # Sensors stay the same and shouldn't be batched -(a::DeepONet)(x::AbstractArray, y::AbstractArray) = - throw(ArgumentError("Sensor locations fed to trunk net can't be batched.")) +function (a::DeepONet)(x::AbstractArray, y::AbstractArray) + throw(ArgumentError("Sensor locations fed to trunk net can't be batched.")) +end # Print nicely function Base.show(io::IO, l::DeepONet) - print(io, "DeepONet with\nbranch net: (",l.branch_net) + print(io, "DeepONet with\nbranch net: (", l.branch_net) print(io, ")\n") print(io, "Trunk net: (", l.trunk_net) print(io, ")\n") -end \ No newline at end of file +end diff --git a/src/FourierLayer.jl b/src/FourierLayer.jl index 43e6648..468c8e3 100644 --- a/src/FourierLayer.jl +++ b/src/FourierLayer.jl @@ -1,136 +1,139 @@ -""" -`FourierLayer(in, out, grid, modes, σ=identity, init=glorot_uniform)` -`FourierLayer(Wf::AbstractArray, Wl::AbstractArray, [bias_f, bias_l, σ])` - -Create a Layer of the Fourier Neural Operator as proposed by Li et al. -arXiv: 2010.08895 - -The layer does a fourier transform on the grid dimension of the input array, -filters higher modes out by the weight matrix and transforms it to the -specified output dimension such that In x M x N -> Out x M x N. -The output though only contains the relevant Fourier modes with the rest padded to zero -in the last axis as a result of the filtering. - -The input `x` should be a rank 3 tensor of shape -(num parameters (`in`) x num grid points (`grid`) x batch size (`batch`)) -The output `y` will be a rank 3 tensor of shape -(`out` x num grid points (`grid`) x batch size (`batch`)) - -You can specify biases for the paths as you like, though the convolutional path is -originally not intended to perform an affine transformation. - -# Examples -Say you're considering a 1D diffusion problem on a 64 point grid. The input is comprised -of the grid points as well as the IC at this point. -The data consists of 200 instances of the solution. -Beforehand we convert the two input channels into a higher-dimensional latent space with 128 nodes by using a regular `Dense` layer. -So the input takes the dimension `128 x 64 x 200`. -The output would be the diffused variable at a later time, which initially makes the output of the form `128 x 64 x 200` as well. Finally, we have to squeeze this high-dimensional ouptut into the one quantity of interest again by using a `Dense` layer. - -We wish to only keep the first 16 modes of the input and work with the classic sigmoid function as activation. - -So we would have: - -```julia -model = FourierLayer(128, 128, 100, 16, σ) -``` -""" -struct FourierLayer{F,Tc<:Complex{<:AbstractFloat},Tr<:AbstractFloat,Bf,Bl} - # F: Activation, Tc/Tr: Complex/Real eltype - Wf::AbstractArray{Tc,3} - Wl::AbstractMatrix{Tr} - grid::Int - σ::F - λ::Int - bf::Bf - bl::Bl - # Constructor for the entire fourier layer - function FourierLayer( - Wf::AbstractArray{Tc,3}, Wl::AbstractMatrix{Tr}, - grid::Int, σ::F = identity, - λ::Int = 12, bf = true, bl = true) where - {F,Tc<:Complex{<:AbstractFloat},Tr<:AbstractFloat} - - # create the biases with one singleton dimension - bf = Flux.create_bias(Wf, bf, 1, size(Wf,2), size(Wf,3)) - bl = Flux.create_bias(Wl, bl, 1, size(Wl,1), grid) - new{F,Tc,Tr,typeof(bf),typeof(bl)}(Wf, Wl, grid, σ, λ, bf, bl) - end -end - -# Declare the function that assigns Weights and biases to the layer -# `in` and `out` refer to the dimensionality of the number of parameters -# `modes` specifies the number of modes not to be filtered out -# `grid` specifies the number of grid points in the data -function FourierLayer(in::Integer, out::Integer, grid::Integer, modes = 12, - σ = identity; initf = cglorot_uniform, initl = Flux.glorot_uniform, - bias_fourier=true, bias_linear=true) - - # Initialize Fourier weight matrix (only with relevant modes) - Wf = initf(in, out, modes) - # Make sure filtering works - @assert modes <= floor(Int, grid/2 + 1) "Specified modes exceed allowed maximum. - The number of modes to filter must be smaller than N/2 + 1" - # Pad the fourier weight matrix with additional zeros - Wf = pad_zeros(Wf, (0, floor(Int, grid/2 + 1) - modes), dims=3) - - # Initialize Linear weight matrix - Wl = initl(out, in) - - # Pass the bias bools - bf = bias_fourier - bl = bias_linear - - # Pass the modes for output - λ = modes - - return FourierLayer(Wf, Wl, grid, σ, λ, bf, bl) -end - -# Only train the weight array with non-zero modes -Flux.@functor FourierLayer -Flux.trainable(a::FourierLayer) = (a.Wf[:,:,1:a.λ], a.Wl, - typeof(a.bf) != Flux.Zeros ? a.bf[:,:,1:a.λ] : nothing, - typeof(a.bl) != Flux.Zeros ? a.bl : nothing) - -# The actual layer that does stuff -function (a::FourierLayer)(x::AbstractArray) - # Assign the parameters - Wf, Wl, bf, bl, σ, = a.Wf, a.Wl, a.bf, a.bl, NNlib.fast_act(a.σ, x) - - # Do a permutation: DataLoader requires batch to be the last dim - # for the rest, it's more convenient to have it in the first one - xp = permutedims(x, [3,1,2]) - - # The linear path - # x -> Wl - @ein linear[batch, out, grid] := Wl[out, in] * xp[batch, in, grid] - linear .+ bl - - # The convolution path - # x -> 𝔉 -> Wf -> i𝔉 - # Do the Fourier transform (FFT) along the grid dimension of the input and - # Multiply the weight matrix with the input using batched multiplication - # We need to permute the input to (channel,batch,grid), otherwise batching won't work - @ein 𝔉[batch, out, grid] := Wf[in, out, grid] * rfft(xp, 3)[batch, in, grid] - 𝔉 .+ bf - - # Do the inverse transform - # We need to permute back to match the shape of the linear path - i𝔉 = irfft(𝔉, size(xp,3),3) - - # Return the activated sum - return permutedims(σ.(linear + i𝔉), [2,3,1]) -end - -# Print nicely -function Base.show(io::IO, l::FourierLayer) - print(io, "FourierLayer with\nConvolution path: (", size(l.Wf, 2), ", ", - size(l.Wf, 1), ", ", size(l.Wf, 3)) - print(io, ")\n") - print(io, "Linear path: (", size(l.Wl, 2), ", ", size(l.Wl, 1)) - print(io, ")\n") - print(io, "Fourier modes: ", l.λ) - print(io, "\n") - l.σ == identity || print(io, "Activation: ", l.σ) -end \ No newline at end of file +""" +`FourierLayer(in, out, grid, modes, σ=identity, init=glorot_uniform)` +`FourierLayer(Wf::AbstractArray, Wl::AbstractArray, [bias_f, bias_l, σ])` + +Create a Layer of the Fourier Neural Operator as proposed by Li et al. +arXiv: 2010.08895 + +The layer does a fourier transform on the grid dimension of the input array, +filters higher modes out by the weight matrix and transforms it to the +specified output dimension such that In x M x N -> Out x M x N. +The output though only contains the relevant Fourier modes with the rest padded to zero +in the last axis as a result of the filtering. + +The input `x` should be a rank 3 tensor of shape +(num parameters (`in`) x num grid points (`grid`) x batch size (`batch`)) +The output `y` will be a rank 3 tensor of shape +(`out` x num grid points (`grid`) x batch size (`batch`)) + +You can specify biases for the paths as you like, though the convolutional path is +originally not intended to perform an affine transformation. + +# Examples +Say you're considering a 1D diffusion problem on a 64 point grid. The input is comprised +of the grid points as well as the IC at this point. +The data consists of 200 instances of the solution. +Beforehand we convert the two input channels into a higher-dimensional latent space with 128 nodes by using a regular `Dense` layer. +So the input takes the dimension `128 x 64 x 200`. +The output would be the diffused variable at a later time, which initially makes the output of the form `128 x 64 x 200` as well. Finally, we have to squeeze this high-dimensional ouptut into the one quantity of interest again by using a `Dense` layer. + +We wish to only keep the first 16 modes of the input and work with the classic sigmoid function as activation. + +So we would have: + +```julia +model = FourierLayer(128, 128, 100, 16, σ) +``` +""" +struct FourierLayer{F, Tc <: Complex{<:AbstractFloat}, Tr <: AbstractFloat, Bf, Bl} + # F: Activation, Tc/Tr: Complex/Real eltype + Wf::AbstractArray{Tc, 3} + Wl::AbstractMatrix{Tr} + grid::Int + σ::F + λ::Int + bf::Bf + bl::Bl + # Constructor for the entire fourier layer + function FourierLayer( + Wf::AbstractArray{Tc, 3}, Wl::AbstractMatrix{Tr}, + grid::Int, σ::F = identity, + λ::Int = 12, bf = true, + bl = true) where + {F, Tc <: Complex{<:AbstractFloat}, Tr <: AbstractFloat} + + # create the biases with one singleton dimension + bf = Flux.create_bias(Wf, bf, 1, size(Wf, 2), size(Wf, 3)) + bl = Flux.create_bias(Wl, bl, 1, size(Wl, 1), grid) + new{F, Tc, Tr, typeof(bf), typeof(bl)}(Wf, Wl, grid, σ, λ, bf, bl) + end +end + +# Declare the function that assigns Weights and biases to the layer +# `in` and `out` refer to the dimensionality of the number of parameters +# `modes` specifies the number of modes not to be filtered out +# `grid` specifies the number of grid points in the data +function FourierLayer(in::Integer, out::Integer, grid::Integer, modes = 12, + σ = identity; initf = cglorot_uniform, initl = Flux.glorot_uniform, + bias_fourier = true, bias_linear = true) + + # Initialize Fourier weight matrix (only with relevant modes) + Wf = initf(in, out, modes) + # Make sure filtering works + @assert modes <= floor(Int, grid/2 + 1) "Specified modes exceed allowed maximum. + The number of modes to filter must be smaller than N/2 + 1" + # Pad the fourier weight matrix with additional zeros + Wf = pad_zeros(Wf, (0, floor(Int, grid/2 + 1) - modes), dims = 3) + + # Initialize Linear weight matrix + Wl = initl(out, in) + + # Pass the bias bools + bf = bias_fourier + bl = bias_linear + + # Pass the modes for output + λ = modes + + return FourierLayer(Wf, Wl, grid, σ, λ, bf, bl) +end + +# Only train the weight array with non-zero modes +Flux.@functor FourierLayer +function Flux.trainable(a::FourierLayer) + (a.Wf[:, :, 1:a.λ], a.Wl, + typeof(a.bf) != Flux.Zeros ? a.bf[:, :, 1:a.λ] : nothing, + typeof(a.bl) != Flux.Zeros ? a.bl : nothing) +end + +# The actual layer that does stuff +function (a::FourierLayer)(x::AbstractArray) + # Assign the parameters + Wf, Wl, bf, bl, σ, = a.Wf, a.Wl, a.bf, a.bl, NNlib.fast_act(a.σ, x) + + # Do a permutation: DataLoader requires batch to be the last dim + # for the rest, it's more convenient to have it in the first one + xp = permutedims(x, [3, 1, 2]) + + # The linear path + # x -> Wl + @ein linear[batch, out, grid] := Wl[out, in] * xp[batch, in, grid] + linear .+ bl + + # The convolution path + # x -> 𝔉 -> Wf -> i𝔉 + # Do the Fourier transform (FFT) along the grid dimension of the input and + # Multiply the weight matrix with the input using batched multiplication + # We need to permute the input to (channel,batch,grid), otherwise batching won't work + @ein 𝔉[batch, out, grid] := Wf[in, out, grid] * rfft(xp, 3)[batch, in, grid] + 𝔉 .+ bf + + # Do the inverse transform + # We need to permute back to match the shape of the linear path + i𝔉 = irfft(𝔉, size(xp, 3), 3) + + # Return the activated sum + return permutedims(σ.(linear + i𝔉), [2, 3, 1]) +end + +# Print nicely +function Base.show(io::IO, l::FourierLayer) + print(io, "FourierLayer with\nConvolution path: (", size(l.Wf, 2), ", ", + size(l.Wf, 1), ", ", size(l.Wf, 3)) + print(io, ")\n") + print(io, "Linear path: (", size(l.Wl, 2), ", ", size(l.Wl, 1)) + print(io, ")\n") + print(io, "Fourier modes: ", l.λ) + print(io, "\n") + l.σ == identity || print(io, "Activation: ", l.σ) +end diff --git a/src/OperatorLearning.jl b/src/OperatorLearning.jl index 5e52209..866a397 100644 --- a/src/OperatorLearning.jl +++ b/src/OperatorLearning.jl @@ -1,21 +1,21 @@ -module OperatorLearning - -using Base: Integer, ident_cmp, Float32 -using CUDA -using Flux -using FFTW -using FFTW: assert_applicable, unsafe_execute!, FORWARD, BACKWARD, rFFTWPlan -using Random -using Random: AbstractRNG -using Flux: nfan, glorot_uniform, batch -using OMEinsum - -export FourierLayer, DeepONet - -include("FourierLayer.jl") -include("DeepONet.jl") -include("ComplexWeights.jl") -include("batched.jl") -include("subnets.jl") - -end # module +module OperatorLearning + +using Base: Integer, ident_cmp, Float32 +using CUDA +using Flux +using FFTW +using FFTW: assert_applicable, unsafe_execute!, FORWARD, BACKWARD, rFFTWPlan +using Random +using Random: AbstractRNG +using Flux: nfan, glorot_uniform, batch +using OMEinsum + +export FourierLayer, DeepONet + +include("FourierLayer.jl") +include("DeepONet.jl") +include("ComplexWeights.jl") +include("batched.jl") +include("subnets.jl") + +end # module diff --git a/src/batched.jl b/src/batched.jl index 1aff86d..2696e4e 100644 --- a/src/batched.jl +++ b/src/batched.jl @@ -8,20 +8,20 @@ For `mul!`, `FFTW.jl` already provides an implementation. However, we need to lo """ # Extension for CPU Arrays -for (Tr,Tc) in ((:Float32,:(Complex{Float32})),(:Float64,:(Complex{Float64}))) +for (Tr, Tc) in ((:Float32, :(Complex{Float32})), (:Float64, :(Complex{Float64}))) # Note: use $FORWARD and $BACKWARD below because of issue #9775 @eval begin - function NNlib.batched_mul!(y::StridedArray{$Tc}, p::rFFTWPlan{$Tr,$FORWARD}, x::StridedArray{$Tr}) - assert_applicable(p, x[:,:,1], y[:,:,1]) # no need to check every batch dim - @inbounds for k ∈ 1:size(y,3) - @views unsafe_execute!(p, x[:,:,k], y[:,:,k]) + function NNlib.batched_mul!(y::StridedArray{$Tc}, p::rFFTWPlan{$Tr, $FORWARD}, x::StridedArray{$Tr}) + assert_applicable(p, x[:, :, 1], y[:, :, 1]) # no need to check every batch dim + @inbounds for k in 1:size(y, 3) + @views unsafe_execute!(p, x[:, :, k], y[:, :, k]) end return y end - function NNlib.batched_mul!(y::StridedArray{$Tr}, p::rFFTWPlan{$Tc,$BACKWARD}, x::StridedArray{$Tc}) - assert_applicable(p, x[:,:,1], y[:,:,1]) # no need to check every batch dim - @inbounds for k ∈ 1:size(y,3) - @views unsafe_execute!(p, x[:,:,k], y[:,:,k]) + function NNlib.batched_mul!(y::StridedArray{$Tr}, p::rFFTWPlan{$Tc, $BACKWARD}, x::StridedArray{$Tc}) + assert_applicable(p, x[:, :, 1], y[:, :, 1]) # no need to check every batch dim + @inbounds for k in 1:size(y, 3) + @views unsafe_execute!(p, x[:, :, k], y[:, :, k]) end return y end @@ -29,11 +29,11 @@ for (Tr,Tc) in ((:Float32,:(Complex{Float32})),(:Float64,:(Complex{Float64}))) end # Methods for GPU Arrays, borrowed from CUDA.jl -> fft.jl #490 -function NNlib.batched_mul!(y::DenseCuArray{Ty}, p::CuFFTPlan{T,K,false}, x::DenseCuArray{T} - ) where {Ty,T,K} - CUFFT.assert_applicable(p, x[:,:,1], y[:,:,1]) - @inbounds for k ∈ 1:size(y,3) - @views CUFFT.unsafe_execute!(p, x[:,:,k] ,y[:,:,k]) +function NNlib.batched_mul!(y::DenseCuArray{Ty}, p::CuFFTPlan{T, K, false}, x::DenseCuArray{T} +) where {Ty, T, K} + CUFFT.assert_applicable(p, x[:, :, 1], y[:, :, 1]) + @inbounds for k in 1:size(y, 3) + @views CUFFT.unsafe_execute!(p, x[:, :, k], y[:, :, k]) end return y end diff --git a/src/subnets.jl b/src/subnets.jl index 82af33b..93d271c 100644 --- a/src/subnets.jl +++ b/src/subnets.jl @@ -24,16 +24,16 @@ julia> model([2,1]) ``` """ function construct_subnet(architecture::Tuple, σ = identity; - init=Flux.glorot_uniform, bias=true) + init = Flux.glorot_uniform, bias = true) # First, create an array that contains all Dense layers independently # Given n-element architecture constructs n-1 layers layers = Array{Flux.Dense}(undef, length(architecture)-1) - @inbounds for i ∈ 2:length(architecture) - layers[i-1] = Flux.Dense(architecture[i-1], architecture[i], σ; - init=init, bias=bias) + @inbounds for i in 2:length(architecture) + layers[i - 1] = Flux.Dense(architecture[i - 1], architecture[i], σ; + init = init, bias = bias) end # Concatenate the layers to a string, chain them and parse them into # the Flux Chain constructor syntax - return Meta.parse("Chain("*join(layers,",")*")") |> eval -end \ No newline at end of file + return Meta.parse("Chain("*join(layers, ",")*")") |> eval +end diff --git a/test/complexweights.jl b/test/complexweights.jl index e274449..bfc3988 100644 --- a/test/complexweights.jl +++ b/test/complexweights.jl @@ -1,13 +1,13 @@ -using Test, Random, Flux - -@testset "Weights" begin - @testset "uniform" begin - @test size(OperatorLearning.cglorot_uniform(128,64,10)) == (128, 64, 10) - @test eltype(OperatorLearning.cglorot_uniform(128,64,10)) == ComplexF32 - end - - @testset "normal" begin - @test size(OperatorLearning.cglorot_normal(128,64,10)) == (128, 64, 10) - @test eltype(OperatorLearning.cglorot_normal(128,64,10)) == ComplexF32 - end -end \ No newline at end of file +using Test, Random, Flux + +@testset "Weights" begin + @testset "uniform" begin + @test size(OperatorLearning.cglorot_uniform(128, 64, 10)) == (128, 64, 10) + @test eltype(OperatorLearning.cglorot_uniform(128, 64, 10)) == ComplexF32 + end + + @testset "normal" begin + @test size(OperatorLearning.cglorot_normal(128, 64, 10)) == (128, 64, 10) + @test eltype(OperatorLearning.cglorot_normal(128, 64, 10)) == ComplexF32 + end +end diff --git a/test/deeponet.jl b/test/deeponet.jl index 0f3a482..22b9456 100644 --- a/test/deeponet.jl +++ b/test/deeponet.jl @@ -4,14 +4,18 @@ using Test, Random, Flux @testset "dimensions" begin # Test the proper construction # Branch net - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).branch_net.layers[end].weight) == (72,64) - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).branch_net.layers[end].bias) == (72,) + @test size(DeepONet((32, 64, 72), (24, 48, 72), σ, tanh).branch_net.layers[end].weight) == + (72, 64) + @test size(DeepONet((32, 64, 72), (24, 48, 72), σ, tanh).branch_net.layers[end].bias) == + (72,) # Trunk net - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).trunk_net.layers[end].weight) == (72,48) - @test size(DeepONet((32,64,72), (24,48,72), σ, tanh).trunk_net.layers[end].bias) == (72,) + @test size(DeepONet((32, 64, 72), (24, 48, 72), σ, tanh).trunk_net.layers[end].weight) == + (72, 48) + @test size(DeepONet((32, 64, 72), (24, 48, 72), σ, tanh).trunk_net.layers[end].bias) == + (72,) end # Accept only Int as architecture parameters - @test_throws MethodError DeepONet((32.5,64,72), (24,48,72), σ, tanh) - @test_throws MethodError DeepONet((32,64,72), (24.1,48,72)) -end \ No newline at end of file + @test_throws MethodError DeepONet((32.5, 64, 72), (24, 48, 72), σ, tanh) + @test_throws MethodError DeepONet((32, 64, 72), (24.1, 48, 72)) +end diff --git a/test/fourierlayer.jl b/test/fourierlayer.jl index a26af98..98bd987 100644 --- a/test/fourierlayer.jl +++ b/test/fourierlayer.jl @@ -1,31 +1,31 @@ -using Test, Random, Flux - -@testset "FourierLayer" begin - @testset "dimensions" begin - # Test the proper construction - @test size(FourierLayer(128, 64, 100, 20).Wf) == (128, 64, 51) - @test size(FourierLayer(128, 64, 100, 20).Wl) == (64, 128) - @test size(FourierLayer(128, 64, 100, 20).bf) == (1, 64, 51) - @test size(FourierLayer(128, 64, 100, 20).bl) == (1, 64, 100) - end - - # Test proper parameter assignment - # We only use a subset of the weight tensors for training - @testset "parameters" begin - # Wf - @test size(Flux.params(FourierLayer(128, 64, 100, 20))[1]) == (128, 64, 20) - # Wl - @test size(Flux.params(FourierLayer(128, 64, 100, 20))[2]) == (64, 128) - # bf - @test size(Flux.params(FourierLayer(128, 64, 100, 20))[3]) == (1, 64, 20) - # bl - @test size(Flux.params(FourierLayer(128, 64, 100, 20))[4]) == (1, 64, 100) - end - - # Accept only Int as architecture parameters - @test_throws MethodError FourierLayer(128.5, 64, 100, 20) - @test_throws MethodError FourierLayer(128.5, 64, 100, 20, tanh) - # Test max amount of modes - @test_throws AssertionError FourierLayer(100, 100, 100, 60, σ) - @test_throws AssertionError FourierLayer(100, 100, 100, 60) -end +using Test, Random, Flux + +@testset "FourierLayer" begin + @testset "dimensions" begin + # Test the proper construction + @test size(FourierLayer(128, 64, 100, 20).Wf) == (128, 64, 51) + @test size(FourierLayer(128, 64, 100, 20).Wl) == (64, 128) + @test size(FourierLayer(128, 64, 100, 20).bf) == (1, 64, 51) + @test size(FourierLayer(128, 64, 100, 20).bl) == (1, 64, 100) + end + + # Test proper parameter assignment + # We only use a subset of the weight tensors for training + @testset "parameters" begin + # Wf + @test size(Flux.params(FourierLayer(128, 64, 100, 20))[1]) == (128, 64, 20) + # Wl + @test size(Flux.params(FourierLayer(128, 64, 100, 20))[2]) == (64, 128) + # bf + @test size(Flux.params(FourierLayer(128, 64, 100, 20))[3]) == (1, 64, 20) + # bl + @test size(Flux.params(FourierLayer(128, 64, 100, 20))[4]) == (1, 64, 100) + end + + # Accept only Int as architecture parameters + @test_throws MethodError FourierLayer(128.5, 64, 100, 20) + @test_throws MethodError FourierLayer(128.5, 64, 100, 20, tanh) + # Test max amount of modes + @test_throws AssertionError FourierLayer(100, 100, 100, 60, σ) + @test_throws AssertionError FourierLayer(100, 100, 100, 60) +end diff --git a/test/runtests.jl b/test/runtests.jl index 0294fd7..8eebe90 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,27 +1,27 @@ -using Pkg -const LONGER_TESTS = false -const GROUP = get(ENV, "GROUP", "All") - -using OperatorLearning -using Test -using Random - -Random.seed!(0) - -if GROUP == "All" || GROUP == "Core" -@testset "FourierLayer" begin - include("fourierlayer.jl") -end - -@testset "DeepONet" begin - include("deeponet.jl") -end - -@testset "Weights" begin - include("complexweights.jl") -end -end - -if GROUP == "GPU" - # Add GPU Tests Here -end +using Pkg +const LONGER_TESTS = false +const GROUP = get(ENV, "GROUP", "All") + +using OperatorLearning +using Test +using Random + +Random.seed!(0) + +if GROUP == "All" || GROUP == "Core" + @testset "FourierLayer" begin + include("fourierlayer.jl") + end + + @testset "DeepONet" begin + include("deeponet.jl") + end + + @testset "Weights" begin + include("complexweights.jl") + end +end + +if GROUP == "GPU" + # Add GPU Tests Here +end