Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backward induction and beta =1 #172 #228

Merged
merged 15 commits into from
Jan 11, 2019
2 changes: 1 addition & 1 deletion src/QuantEcon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ export
# ddp
DiscreteDP, VFI, PFI, MPFI, solve, RQ_sigma,
evaluate_policy, bellman_operator, compute_greedy,
bellman_operator!, compute_greedy!, num_states,
bellman_operator!, compute_greedy!, num_states, backward_induction,

# zeros / optimization
bisect, brenth, brent, ridder, expand_bracket, divide_bracket,
Expand Down
86 changes: 78 additions & 8 deletions src/markov/ddp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Discrete Decision Processes
References
----------

https://lectures.quantecon.org/py/discrete_dp.html
https://lectures.quantecon.org/jl/discrete_dp.html

Notes
-----
Expand Down Expand Up @@ -65,7 +65,7 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ}
msg = "R must be 2-dimensional without s-a formulation"
throw(ArgumentError(msg))
end
(beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)"))
(beta < 0 || beta > 1) && throw(ArgumentError("beta must be [0, 1]"))

# verify input integrity 2
num_states, num_actions = size(R)
Expand Down Expand Up @@ -103,8 +103,10 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ}
if NR != 1
throw(ArgumentError("R must be 1-dimensional with s-a formulation"))
end
(beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)"))

(beta < 0 || beta > 1) && throw(ArgumentError("beta must be [0, 1]"))
if beta == 1
@warn("infinite horizon solution methods are disabled with beta=1")
end
# verify input integrity (same length)
num_sa_pairs, num_states = size(Q)
if length(R) != num_sa_pairs
Expand Down Expand Up @@ -224,7 +226,7 @@ This refers to the Value Iteration solution algorithm.
References
----------

https://lectures.quantecon.org/py/discrete_dp.html
https://lectures.quantecon.org/jl/discrete_dp.html

"""
struct VFI <: DDPAlgorithm end
Expand All @@ -235,9 +237,9 @@ This refers to the Policy Iteration solution algorithm.
References
----------

https://lectures.quantecon.org/py/discrete_dp.html
https://lectures.quantecon.org/jl/discrete_dp.html

"""
"""
struct PFI <: DDPAlgorithm end

"""
Expand All @@ -246,7 +248,7 @@ This refers to the Modified Policy Iteration solution algorithm.
References
----------

https://lectures.quantecon.org/py/discrete_dp.html
https://lectures.quantecon.org/jl/discrete_dp.html

"""
struct MPFI <: DDPAlgorithm end
Expand Down Expand Up @@ -453,6 +455,10 @@ Compute the value of a policy.

"""
function evaluate_policy(ddp::DiscreteDP, sigma::AbstractVector{T}) where T<:Integer
if ddp.beta == 1.0
throw(ArgumentError("method invalid for beta = 1"))
end

R_sigma, Q_sigma = RQ_sigma(ddp, sigma)
b = R_sigma
A = I - ddp.beta * Q_sigma
Expand Down Expand Up @@ -501,6 +507,60 @@ function solve(ddp::DiscreteDP{T}, v_init::AbstractVector{T},
ddpr
end

"""
oyamad marked this conversation as resolved.
Show resolved Hide resolved
backward_induction(ddp, J[, v_term=zeros(num_states(ddp))])

Solve by backward induction a ``J``-period finite horizon discrete dynamic
program with stationary reward ``r`` and transition probability functions ``q``
and discount factor ``\\beta \\in [0, 1]``.

The optimal value functions ``v^{\\ast}_1, \\ldots, v^{\\ast}_{J+1}`` and
policy functions ``\\sigma^{\\ast}_1, \\ldots, \\sigma^{\\ast}_J`` are obtained
by ``v^{\\ast}_{J+1} = v_{J+1}``, and

```math
v^{\\ast}_j(s) = \\max_{a \\in A(s)} r(s, a) +
\\beta \\sum_{s' \\in S} q(s'|s, a) v^{\\ast}_{j+1}(s')
\\quad (s \\in S)
```
and
```math
\\sigma^{\\ast}_j(s) \\in \\operatorname*{arg\\,max}_{a \\in A(s)}
r(s, a) + \\beta \\sum_{s' \\in S} q(s'|s, a) v^*_{j+1}(s')
\\quad (s \\in S)
```

for ``j= J, \\ldots, 1``, where the terminal value function ``v_{J+1}`` is
exogenously given by `v_term`.

# Parameters

- `ddp::DiscreteDP{T}` : Object that contains the Model Parameters
- `J::Integer`: Number of decision periods
- `v_term::AbstractVector{<:Real}=zeros(num_states(ddp))`: Terminal value
function of length equal to n (the number of states)

# Returns

- `vs::Matrix{S}`: Array of shape (n, J+1) where `vs[:,j]` contains the
optimal value function at period j = 1, ..., J+1.
- `sigmas::Matrix{Int}`: Array of shape (n, J) where `sigmas[:,j]` contains the
optimal policy function at period j = 1, ..., J.
"""
function backward_induction(ddp::DiscreteDP{T}, J::Integer,
v_term::AbstractVector{<:Real}=
zeros(num_states(ddp))) where {T}
n = num_states(ddp)
S = typeof(zero(T)/one(T))
vs = Matrix{S}(undef, n, J+1)
vs[:,end] = v_term
sigmas = Matrix{Int}(undef, n, J)
@inbounds for j in J+1: -1: 2
@views bellman_operator!(ddp, vs[:,j], vs[:,j-1], sigmas[:,j-1])
end
oyamad marked this conversation as resolved.
Show resolved Hide resolved
return vs, sigmas
end

# --------- #
# Other API #
# --------- #
Expand Down Expand Up @@ -777,6 +837,8 @@ function _solve!(
)
if ddp.beta == 0.0
tol = Inf
elseif ddp.beta == 1.0
throw(ArgumentError("method invalid for beta = 1"))
else
tol = epsilon * (1-ddp.beta) / (2*ddp.beta)
end
Expand Down Expand Up @@ -811,6 +873,10 @@ function _solve!(
)
old_sigma = copy(ddpr.sigma)

if ddp.beta == 1.0
throw(ArgumentError("method invalid for beta = 1"))
end

for i in 1:max_iter
ddpr.v = evaluate_policy(ddp, ddpr)
compute_greedy!(ddp, ddpr)
Expand All @@ -836,6 +902,10 @@ function _solve!(
ddp::DiscreteDP, ddpr::DPSolveResult{MPFI}, max_iter::Integer,
epsilon::Real, k::Integer
)
if ddp.beta == 1.0
throw(ArgumentError("method invalid for beta = 1"))
end

beta = ddp.beta
fill!(ddpr.v, minimum(ddp.R[ddp.R .> -Inf]) / (1.0 - beta))
old_sigma = copy(ddpr.sigma)
Expand Down
71 changes: 67 additions & 4 deletions test/test_ddp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Tests for markov/ddp.jl
Q[:, :, 2] = [0.5 1.0; 1.0 1.0]

ddp0 = DiscreteDP(R, Q, beta)
ddp0_b1 = DiscreteDP(R, Q, 1.0)

# Formulation with state-action pairs
L = 3 # Number of state-action pairs
Expand All @@ -35,11 +36,13 @@ Tests for markov/ddp.jl
Q_sa[2, :] = Q[1, 2, :]
Q_sa[3, :] = Q[2, 1, :]
ddp0_sa = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
ddp0_sa_b1 = DiscreteDP(R_sa, Q_sa, 1.0, s_indices, a_indices)

@test issparse(ddp0_sa.Q)

# List of ddp formulations
ddp0_collection = (ddp0, ddp0_sa)
ddp0_b1_collection = (ddp0_b1, ddp0_sa_b1)

# Maximum Iteration and Epsilon for Tests
max_iter = 200
Expand Down Expand Up @@ -86,6 +89,10 @@ Tests for markov/ddp.jl
for ddp in ddp0_collection
@test isapprox(evaluate_policy(ddp, sigma_star), v_star)
end
# Check beta = 1.0 is not allowed
for ddp in ddp0_b1_collection
@test_throws ArgumentError evaluate_policy(ddp,sigma_star)
end
end

@testset "methods for subtypes != (Float64, Int)" begin
Expand Down Expand Up @@ -147,6 +154,10 @@ Tests for markov/ddp.jl
@test res.sigma == sigma_star
@test res_init.sigma == sigma_star
end
# Check beta = 1.0 is not allowed
for ddp_item in ddp0_b1_collection
@test_throws ArgumentError solve(ddp_item, VFI)
end
end

@testset "policy_iteration" begin
Expand All @@ -164,6 +175,10 @@ Tests for markov/ddp.jl
@test res.sigma == sigma_star
@test res_init.sigma == sigma_star
end
# Check beta = 1.0 is not allowed
for ddp_item in ddp0_b1_collection
@test_throws ArgumentError solve(ddp_item, VFI)
end
end

@testset "DiscreteDP{Rational,_,_,Rational} maintains Rational" begin
Expand Down Expand Up @@ -216,6 +231,56 @@ Tests for markov/ddp.jl
# Check sigma == sigma_star
@test res.sigma == sigma_star
end
# Check beta = 1.0 is not allowed
for ddp_item in ddp0_b1_collection
@test_throws ArgumentError solve(ddp_item, MPFI)
end
end

@testset "Backward induction" begin
# From Puterman 2005, Section 3.2, Section 4.6.1
# "single-product stochastic inventory control"

#set up DDP constructor
s_indices = [1, 1, 1, 1, 2, 2, 2, 3, 3, 4]
a_indices = [1, 2, 3, 4, 1, 2, 3, 1, 2, 1]
R = [ 0//1, -1//1, -2//1, -5//1, 5//1, 0//1, -3//1, 6//1, -1//1, 5//1]
Q = [ 1//1 0//1 0//1 0//1;
3//4 1//4 0//1 0//1;
1//4 1//2 1//4 0//1;
0//1 1//4 1//2 1//4;
3//4 1//4 0//1 0//1;
1//4 1//2 1//4 0//1;
0//1 1//4 1//2 1//4;
1//4 1//2 1//4 0//1;
0//1 1//4 1//2 1//4;
0//1 1//4 1//2 1//4]
beta = 1
ddp_rational = DiscreteDP(R, Q, beta, s_indices, a_indices)
R = convert.(Float64, R)
Q = convert.(Float64, Q)
ddp_float = DiscreteDP(R, Q, beta, s_indices, a_indices)

# test for backward induction
J = 3
# expected results
vs_expected = [67//16 2 0 0;
129//16 25//4 5 0;
194//16 10 6 0;
227//16 21//2 5 0]
sigmas_expected = [4 3 1;
1 1 1;
1 1 1;
1 1 1]

vs, sigmas = backward_induction(ddp_rational, J)
@test vs == vs_expected
@test sigmas == sigmas_expected

vs, sigmas = backward_induction(ddp_float, J)
@test isapprox(vs, vs_expected)
@test sigmas == sigmas_expected

end

@testset "DDPsa constructor" begin
Expand All @@ -231,9 +296,8 @@ Tests for markov/ddp.jl
_s_ind = [1, 1, 2]
_a_ind = [1, 2, 1]

@testset "beta in [0, 1)" begin
@testset "beta in [0, 1]" begin
@test_throws ArgumentError DiscreteDP(_R, _Q, -eps(), _s_ind, _a_ind)
@test_throws ArgumentError DiscreteDP(_R, _Q, 1.0, _s_ind, _a_ind)
@test_throws ArgumentError DiscreteDP(_R, _Q, 1+eps(), _s_ind, _a_ind)
end

Expand All @@ -257,9 +321,8 @@ Tests for markov/ddp.jl
end

@testset "DDP constructor" begin
@testset "beta in [0, 1)" begin
@testset "beta in [0, 1]" begin
@test_throws ArgumentError DiscreteDP(R, Q, -eps())
@test_throws ArgumentError DiscreteDP(R, Q, 1.0)
@test_throws ArgumentError DiscreteDP(R, Q, 1+eps())
end

Expand Down