3

I need to speed up this recursive system. Can anyone help?

I annotated all variables and only used Float64-consistent functions. I also tried using memoization (since the system is recursive) via Memoize.jl, but I have to compute U, V and Z thousands of times in an optimization algorithm and my computer quickly runs out of memory.

const Tage = 60.0
const initT = 1975.0
const Ttime = 1990.0
const K = 6.0
const β = 0.95
const s = 0.5

θ0 = [
    0.0011
 -0.0045
 -0.0075
 -0.0013
  3.60   
  0.2587
 -0.0026  
 -0.0528
  0.0060
  1.3772
  0.2932
 -0.0030
  0.0021
  0.0044
 10.2593    
  0.7498
  1.1765
]./10000

function wagem(t::Float64, agem::Float64, edum::Float64, θ::Vector{Float64})
    θ[5] + θ[6]*agem + θ[7]*agem^2 + θ[8]*edum + θ[9]*edum^2
end

function wagef(t::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64})
    θ[10] + θ[11]*agef + θ[12]*agef^2 + θ[13]*eduf + θ[14]*eduf^2
end

function ζ(agem::Float64, edum::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64})
    θ[1]*agem*agef + θ[2]*agem*eduf + θ[3]*edum*agef + θ[4]*edum*eduf
end

function z(t::Float64, agem::Float64, edum::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64})
    ζ(agem, edum, agef, eduf, θ) + wagem(t, agem, edum, θ) + wagef(t, agef, eduf, θ)
end

function dc(θ::Vector{Float64})
    ret::Float64 = 0.0
    ret = θ[15]
    return ret
end

function Z(t::Float64, agem::Float64, edum::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64})
    ret::Float64 = 0.0
    if agem >= Tage || agef >= Tage # terminal age conditions
        ret = 0.0
    elseif t >= Ttime # terminal time condition
        ret = 1.5
    else
        ret = log(
        exp(z(t, agem, edum, agef, eduf, θ) + β*Z(t+1, agem+1, edum, agef+1, eduf, θ))
        + exp(-dc(θ) + β*(V(t+1, agem+1, edum, θ) + U(t+1, agef+1, eduf, θ)))
        )
    end
    return ret
end

function V(t::Float64, agem::Float64, edum::Float64, θ::Vector{Float64})
    ret::Float64 = 0.0
    if agem >= Tage
        ret = 0.0
    elseif t >= Ttime
        ret = 1.5
    else
        suma::Float64 = 0.0
        for agef in 16.0:Tage-1, eduf in 1.0:K
            suma += exp(s*z(t, agem, edum, agef, eduf, θ) + wagem(t, agem, edum, θ) + β*(s*(Z(t+1, agem+1, edum, agef+1, eduf, θ) - V(t+1, agem+1, edum, θ) - U(t+1, agef+1, eduf, θ)) + V(t+1, agem+1, edum, θ)))
        end
        ret = log(
                exp(wagem(t, agem, edum, θ) + β*(V(t+1, agem+1, edum, θ))) + suma
                )
    end
    return ret
end

function U(t::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64})
    ret::Float64 = 0.0
    if agef >= Tage
        ret = 0.0
    elseif t >= Ttime
        ret = 1.5
    else
        suma::Float64 = 0.0
        for agem in 16.0:Tage-1, edum in 1.0:K
            suma += exp((1-s)*z(t, agem, edum, agef, eduf, θ) + wagef(t, agef, eduf, θ) + β*((1-s)*(Z(t+1, agem+1, edum, agef+1, eduf, θ) - V(t+1, agem+1, edum, θ) - U(t+1, agef+1, eduf, θ)) + U(t+1, agef+1, eduf, θ)))
        end
        ret = log(
        exp(wagef(t, agef, eduf, θ) + β*(U(t+1, agef+1, eduf, θ))) + suma
        )
    end
    return ret
end

I want to be able to compute things like U(1984.0, 17.0, 3.0, θ0) or V(1975.0, 16.0, 1.0, θ0) very quickly.

Any help would be appreciated.

amrods
  • 2,029
  • 1
  • 17
  • 28

1 Answers1

6

I think the code is like a monster!
You already have 3 recursive functions U(), V(), Z() after that, U() calls V() and vise versa, then V() calls Z() that itself calls U() ......
try rewrite it and prevent recursive calls as much as possible, this will ends to a more efficient solution. Check This-> recursion versus iteration
after that you have unnecessary repetitive calls to U() and V() inside for loop that must be done outside.

FIRST STEP: temp variable

function U(t::Float64, agef::Float64, eduf::Float64, θ::Vector{Float64}) 
    ret::Float64 = 0.0
    if agef >= Tage
        ret = 0.0
    elseif t >= Ttime
        ret = 1.5
    else
        suma::Float64 = 0.0
        U1 = U(t+1, agef+1, eduf, θ) # nothing to do with following loop, so I take it outside
        wagef1 = wagef(t, agef, eduf, θ) # same as above comment
        for agem in 16.0:Tage-1, edum in 1.0:K
            suma += exp((1-s)*z(t, agem, edum, agef, eduf, θ) + wagef1 + β*((1-s)*(Z(t+1, agem+1, edum, agef+1, eduf, θ, ui) - V(t+1, agem+1, edum, θ) - U1) + U1))
        end
        ret = log(
                  exp(wagef1 + β*(U1)) + suma
                  )
    end

    return ret
end

julia> @time U(1984.0, 17.0, 3.0, t0) # a test with const Tage = 19.0
  0.869675 seconds (102.77 k allocations: 2.148 MB, 0.57% gc time)
3.785563216949393

the above edits in both U() and V() makes my run 28 times faster compare to original code.

SECOND STEP: using right types

next thing is mall-usage of Float64 data type, IMO only θ,β,s need to be of Float64 type, and all the other variables should declare as integers.

julia> @time U(1984, 17, 3, t0) # a test with const Tage = 19
  0.608982 seconds (125.77 k allocations: 2.530 MB, 0.89% gc time)
3.785563216949393

now its 30% faster. compare to previous code

THIRD STEP Memoize.jl

although already some improvements have been happened for bigger problems they are not enough, but with using the right data types, in the previous step now `Memoize.jl` will be usable:  


using Memoize
const Tage = 60%Int
const initT = 1975%Int
const Ttime = 1990%Int
const K = 6%Int
const β = 0.95
const s = 0.5

t0 = [
    0.0011
 -0.0045
 -0.0075
 -0.0013
  3.60   
  0.2587
 -0.0026  
 -0.0528
  0.0060
  1.3772
  0.2932
 -0.0030
  0.0021
  0.0044
 10.2593    
  0.7498
  1.1765
]./10000

function wagem(t::Int, agem::Int, edum::Int, θ::Vector{Float64})
    θ[5] + θ[6]*agem + θ[7]*agem^2 + θ[8]*edum + θ[9]*edum^2
end

function wagef(t::Int, agef::Int, eduf::Int, θ::Vector{Float64})
    θ[10] + θ[11]*agef + θ[12]*agef^2 + θ[13]*eduf + θ[14]*eduf^2
end

function ζ(agem::Int, edum::Int, agef::Int, eduf::Int, θ::Vector{Float64})
    θ[1]*agem*agef + θ[2]*agem*eduf + θ[3]*edum*agef + θ[4]*edum*eduf
end

function z(t::Int, agem::Int, edum::Int, agef::Int, eduf::Int, θ::Vector{Float64})
    ζ(agem, edum, agef, eduf, θ) + wagem(t, agem, edum, θ) + wagef(t, agef, eduf, θ)
end

function dc(θ::Vector{Float64})
    ret::Float64 = 0.0
    ret = θ[15]
    return ret
end

function Z(t::Int, agem::Int, edum::Int, agef::Int, eduf::Int, θ::Vector{Float64})
    ret::Float64 = 0.0
    if agem >= Tage || agef >= Tage # terminal age conditions
        ret = 0.0
    elseif t >= Ttime # terminal time condition
        ret = 1.5
    else
        ret = log(
        exp(z(t, agem, edum, agef, eduf, θ) + β*Z(t+1, agem+1, edum, agef+1, eduf, θ))
        + exp(-dc(θ) + β*(V(t+1, agem+1, edum, θ) + U(t+1, agef+1, eduf, θ)))
        )
    end
    return ret
end

@memoize function V(t::Int, agem::Int, edum::Int, θ::Vector{Float64})
    ret::Float64 = 0.0
    agef::Int = 0
    eduf::Int = 0
    if agem >= Tage
        ret = 0.0
    elseif t >= Ttime
        ret = 1.5
    else
        suma::Float64 = 0.0
        V1 = V(t+1, agem+1, edum, θ)
        wagem1 = wagem(t, agem, edum, θ)
        for agef in 16:Tage-1, eduf in 1:K
            suma += exp(s*z(t, agem, edum, agef, eduf, θ) + wagem1 + β*(s*(Z(t+1, agem+1, edum, agef+1, eduf, θ) - V1 - U(t+1, agef+1, eduf, θ)) + V1))
        end
        ret = log(
                exp(wagem1 + β*(V1)) + suma
                )
    end
    return ret
end

@memoize  function U(t::Int, agef::Int, eduf::Int, θ::Vector{Float64})
    ret::Float64 = 0.0
    agem::Int = 0
    edum::Int = 0
    if agef >= Tage
        ret = 0.0
    elseif t >= Ttime
        ret = 1.5
    else
        suma::Float64 = 0.0
        U1 = U(t+1, agef+1, eduf, θ)
        wagef1 = wagef(t, agef, eduf, θ)
        for agem in 16:Tage-1, edum in 1:K
            suma += exp((1-s)*z(t, agem, edum, agef, eduf, θ) + wagef1 + β*((1-s)*(Z(t+1, agem+1, edum, agef+1, eduf, θ) - V(t+1, agem+1, edum, θ) - U1) + U1))
        end
        ret = log(
                  exp(wagef1 + β*(U1)) + suma
                  )
    end  
    return ret
end

now for Tage = 60:

julia> @time U(1984, 17, 3, t0) # a test with const Tage = 60
  3.789108 seconds (27.08 M allocations: 494.135 MB, 15.12% gc time)
16.99266161490023

also I have tested an Int16 version:

julia> @time U(1984%Int16, 17%Int16, 3%Int16, t0)
  3.596871 seconds (27.05 M allocations: 413.808 MB, 11.93% gc time)
16.99266161490023

it runs 5% faster with 20% less memory allocation compare to Int32 version

Community
  • 1
  • 1
Reza Afzalan
  • 5,646
  • 3
  • 26
  • 44
  • Up to this moment, I have always believed that iteration does things better than recursion, but now I have doubts, therefore in my latest edit I remove a sentence that suggests using iteration instead of this recursive approach. Could anyone write a more efficient pure iterative approach for the above problem? – Reza Afzalan Nov 13 '15 at 16:03
  • Thanks a lot @reza. In a first version of it I had several variables as integers, I switched them all to Float64 because I read that Julia is faster when it does not have to convert between types, but I am not a computer person and the languages I have used in the past do not have explicit types (I'm talking Mathematica and other statistical packages). I will check this edits you did to my code. Just for the record, the system must have a solution since I have terminal conditions for t and ages. It's just a reverse recursion system. – amrods Nov 14 '15 at 05:01
  • can a problem like this be parallelized? – amrods Dec 02 '15 at 01:34