diff --git a/src/QuantEcon.jl b/src/QuantEcon.jl index 053a8f50..cd12596d 100644 --- a/src/QuantEcon.jl +++ b/src/QuantEcon.jl @@ -130,7 +130,7 @@ export # ddp DiscreteDP, VFI, PFI, MPFI, solve, RQ_sigma, evaluate_policy, bellman_operator, compute_greedy, - bellman_operator!, compute_greedy!, num_states, + bellman_operator!, compute_greedy!, num_states, backward_induction, # zeros / optimization bisect, brenth, brent, ridder, expand_bracket, divide_bracket, diff --git a/src/markov/ddp.jl b/src/markov/ddp.jl index a171ab41..32bac277 100644 --- a/src/markov/ddp.jl +++ b/src/markov/ddp.jl @@ -8,7 +8,7 @@ Discrete Decision Processes References ---------- -https://lectures.quantecon.org/py/discrete_dp.html +https://lectures.quantecon.org/jl/discrete_dp.html Notes ----- @@ -65,7 +65,7 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ} msg = "R must be 2-dimensional without s-a formulation" throw(ArgumentError(msg)) end - (beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)")) + (beta < 0 || beta > 1) && throw(ArgumentError("beta must be [0, 1]")) # verify input integrity 2 num_states, num_actions = size(R) @@ -103,8 +103,10 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ} if NR != 1 throw(ArgumentError("R must be 1-dimensional with s-a formulation")) end - (beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)")) - + (beta < 0 || beta > 1) && throw(ArgumentError("beta must be [0, 1]")) + if beta == 1 + @warn("infinite horizon solution methods are disabled with beta=1") + end # verify input integrity (same length) num_sa_pairs, num_states = size(Q) if length(R) != num_sa_pairs @@ -224,7 +226,7 @@ This refers to the Value Iteration solution algorithm. References ---------- -https://lectures.quantecon.org/py/discrete_dp.html +https://lectures.quantecon.org/jl/discrete_dp.html """ struct VFI <: DDPAlgorithm end @@ -235,9 +237,9 @@ This refers to the Policy Iteration solution algorithm. References ---------- -https://lectures.quantecon.org/py/discrete_dp.html +https://lectures.quantecon.org/jl/discrete_dp.html -""" +""" struct PFI <: DDPAlgorithm end """ @@ -246,7 +248,7 @@ This refers to the Modified Policy Iteration solution algorithm. References ---------- -https://lectures.quantecon.org/py/discrete_dp.html +https://lectures.quantecon.org/jl/discrete_dp.html """ struct MPFI <: DDPAlgorithm end @@ -453,6 +455,10 @@ Compute the value of a policy. """ function evaluate_policy(ddp::DiscreteDP, sigma::AbstractVector{T}) where T<:Integer + if ddp.beta == 1.0 + throw(ArgumentError("method invalid for beta = 1")) + end + R_sigma, Q_sigma = RQ_sigma(ddp, sigma) b = R_sigma A = I - ddp.beta * Q_sigma @@ -501,6 +507,60 @@ function solve(ddp::DiscreteDP{T}, v_init::AbstractVector{T}, ddpr end +""" + backward_induction(ddp, J[, v_term=zeros(num_states(ddp))]) + +Solve by backward induction a ``J``-period finite horizon discrete dynamic +program with stationary reward ``r`` and transition probability functions ``q`` +and discount factor ``\\beta \\in [0, 1]``. + +The optimal value functions ``v^{\\ast}_1, \\ldots, v^{\\ast}_{J+1}`` and +policy functions ``\\sigma^{\\ast}_1, \\ldots, \\sigma^{\\ast}_J`` are obtained +by ``v^{\\ast}_{J+1} = v_{J+1}``, and + +```math +v^{\\ast}_j(s) = \\max_{a \\in A(s)} r(s, a) + +\\beta \\sum_{s' \\in S} q(s'|s, a) v^{\\ast}_{j+1}(s') +\\quad (s \\in S) +``` +and +```math +\\sigma^{\\ast}_j(s) \\in \\operatorname*{arg\\,max}_{a \\in A(s)} + r(s, a) + \\beta \\sum_{s' \\in S} q(s'|s, a) v^*_{j+1}(s') + \\quad (s \\in S) +``` + +for ``j= J, \\ldots, 1``, where the terminal value function ``v_{J+1}`` is +exogenously given by `v_term`. + +# Parameters + +- `ddp::DiscreteDP{T}` : Object that contains the Model Parameters +- `J::Integer`: Number of decision periods +- `v_term::AbstractVector{<:Real}=zeros(num_states(ddp))`: Terminal value + function of length equal to n (the number of states) + +# Returns + +- `vs::Matrix{S}`: Array of shape (n, J+1) where `vs[:,j]` contains the + optimal value function at period j = 1, ..., J+1. +- `sigmas::Matrix{Int}`: Array of shape (n, J) where `sigmas[:,j]` contains the + optimal policy function at period j = 1, ..., J. +""" +function backward_induction(ddp::DiscreteDP{T}, J::Integer, + v_term::AbstractVector{<:Real}= + zeros(num_states(ddp))) where {T} + n = num_states(ddp) + S = typeof(zero(T)/one(T)) + vs = Matrix{S}(undef, n, J+1) + vs[:,end] = v_term + sigmas = Matrix{Int}(undef, n, J) + @inbounds for j in J+1: -1: 2 + @views bellman_operator!(ddp, vs[:,j], vs[:,j-1], sigmas[:,j-1]) + end + return vs, sigmas +end + # --------- # # Other API # # --------- # @@ -777,6 +837,8 @@ function _solve!( ) if ddp.beta == 0.0 tol = Inf + elseif ddp.beta == 1.0 + throw(ArgumentError("method invalid for beta = 1")) else tol = epsilon * (1-ddp.beta) / (2*ddp.beta) end @@ -811,6 +873,10 @@ function _solve!( ) old_sigma = copy(ddpr.sigma) + if ddp.beta == 1.0 + throw(ArgumentError("method invalid for beta = 1")) + end + for i in 1:max_iter ddpr.v = evaluate_policy(ddp, ddpr) compute_greedy!(ddp, ddpr) @@ -836,6 +902,10 @@ function _solve!( ddp::DiscreteDP, ddpr::DPSolveResult{MPFI}, max_iter::Integer, epsilon::Real, k::Integer ) + if ddp.beta == 1.0 + throw(ArgumentError("method invalid for beta = 1")) + end + beta = ddp.beta fill!(ddpr.v, minimum(ddp.R[ddp.R .> -Inf]) / (1.0 - beta)) old_sigma = copy(ddpr.sigma) diff --git a/test/test_ddp.jl b/test/test_ddp.jl index 0db156dc..ec4fbf22 100644 --- a/test/test_ddp.jl +++ b/test/test_ddp.jl @@ -24,6 +24,7 @@ Tests for markov/ddp.jl Q[:, :, 2] = [0.5 1.0; 1.0 1.0] ddp0 = DiscreteDP(R, Q, beta) + ddp0_b1 = DiscreteDP(R, Q, 1.0) # Formulation with state-action pairs L = 3 # Number of state-action pairs @@ -35,11 +36,13 @@ Tests for markov/ddp.jl Q_sa[2, :] = Q[1, 2, :] Q_sa[3, :] = Q[2, 1, :] ddp0_sa = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) + ddp0_sa_b1 = DiscreteDP(R_sa, Q_sa, 1.0, s_indices, a_indices) @test issparse(ddp0_sa.Q) # List of ddp formulations ddp0_collection = (ddp0, ddp0_sa) + ddp0_b1_collection = (ddp0_b1, ddp0_sa_b1) # Maximum Iteration and Epsilon for Tests max_iter = 200 @@ -86,6 +89,10 @@ Tests for markov/ddp.jl for ddp in ddp0_collection @test isapprox(evaluate_policy(ddp, sigma_star), v_star) end + # Check beta = 1.0 is not allowed + for ddp in ddp0_b1_collection + @test_throws ArgumentError evaluate_policy(ddp,sigma_star) + end end @testset "methods for subtypes != (Float64, Int)" begin @@ -147,6 +154,10 @@ Tests for markov/ddp.jl @test res.sigma == sigma_star @test res_init.sigma == sigma_star end + # Check beta = 1.0 is not allowed + for ddp_item in ddp0_b1_collection + @test_throws ArgumentError solve(ddp_item, VFI) + end end @testset "policy_iteration" begin @@ -164,6 +175,10 @@ Tests for markov/ddp.jl @test res.sigma == sigma_star @test res_init.sigma == sigma_star end + # Check beta = 1.0 is not allowed + for ddp_item in ddp0_b1_collection + @test_throws ArgumentError solve(ddp_item, VFI) + end end @testset "DiscreteDP{Rational,_,_,Rational} maintains Rational" begin @@ -216,6 +231,56 @@ Tests for markov/ddp.jl # Check sigma == sigma_star @test res.sigma == sigma_star end + # Check beta = 1.0 is not allowed + for ddp_item in ddp0_b1_collection + @test_throws ArgumentError solve(ddp_item, MPFI) + end + end + + @testset "Backward induction" begin + # From Puterman 2005, Section 3.2, Section 4.6.1 + # "single-product stochastic inventory control" + + #set up DDP constructor + s_indices = [1, 1, 1, 1, 2, 2, 2, 3, 3, 4] + a_indices = [1, 2, 3, 4, 1, 2, 3, 1, 2, 1] + R = [ 0//1, -1//1, -2//1, -5//1, 5//1, 0//1, -3//1, 6//1, -1//1, 5//1] + Q = [ 1//1 0//1 0//1 0//1; + 3//4 1//4 0//1 0//1; + 1//4 1//2 1//4 0//1; + 0//1 1//4 1//2 1//4; + 3//4 1//4 0//1 0//1; + 1//4 1//2 1//4 0//1; + 0//1 1//4 1//2 1//4; + 1//4 1//2 1//4 0//1; + 0//1 1//4 1//2 1//4; + 0//1 1//4 1//2 1//4] + beta = 1 + ddp_rational = DiscreteDP(R, Q, beta, s_indices, a_indices) + R = convert.(Float64, R) + Q = convert.(Float64, Q) + ddp_float = DiscreteDP(R, Q, beta, s_indices, a_indices) + + # test for backward induction + J = 3 + # expected results + vs_expected = [67//16 2 0 0; + 129//16 25//4 5 0; + 194//16 10 6 0; + 227//16 21//2 5 0] + sigmas_expected = [4 3 1; + 1 1 1; + 1 1 1; + 1 1 1] + + vs, sigmas = backward_induction(ddp_rational, J) + @test vs == vs_expected + @test sigmas == sigmas_expected + + vs, sigmas = backward_induction(ddp_float, J) + @test isapprox(vs, vs_expected) + @test sigmas == sigmas_expected + end @testset "DDPsa constructor" begin @@ -231,9 +296,8 @@ Tests for markov/ddp.jl _s_ind = [1, 1, 2] _a_ind = [1, 2, 1] - @testset "beta in [0, 1)" begin + @testset "beta in [0, 1]" begin @test_throws ArgumentError DiscreteDP(_R, _Q, -eps(), _s_ind, _a_ind) - @test_throws ArgumentError DiscreteDP(_R, _Q, 1.0, _s_ind, _a_ind) @test_throws ArgumentError DiscreteDP(_R, _Q, 1+eps(), _s_ind, _a_ind) end @@ -257,9 +321,8 @@ Tests for markov/ddp.jl end @testset "DDP constructor" begin - @testset "beta in [0, 1)" begin + @testset "beta in [0, 1]" begin @test_throws ArgumentError DiscreteDP(R, Q, -eps()) - @test_throws ArgumentError DiscreteDP(R, Q, 1.0) @test_throws ArgumentError DiscreteDP(R, Q, 1+eps()) end