QuantEcon · sglyon · Jan 11, 2019 · Dec 9, 2018 · Dec 9, 2018 · Dec 9, 2018
diff --git a/src/QuantEcon.jl b/src/QuantEcon.jl
@@ -130,7 +130,7 @@ export
 # ddp
     DiscreteDP, VFI, PFI, MPFI, solve, RQ_sigma,
     evaluate_policy, bellman_operator, compute_greedy,
-    bellman_operator!, compute_greedy!, num_states,
+    bellman_operator!, compute_greedy!, num_states, backward_induction,
 
 # zeros / optimization
     bisect, brenth, brent, ridder, expand_bracket, divide_bracket,

diff --git a/src/markov/ddp.jl b/src/markov/ddp.jl
@@ -8,7 +8,7 @@ Discrete Decision Processes
 References
 ----------
 
-https://lectures.quantecon.org/py/discrete_dp.html
+https://lectures.quantecon.org/jl/discrete_dp.html
 
 Notes
 -----
@@ -65,7 +65,7 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ}
             msg = "R must be 2-dimensional without s-a formulation"
             throw(ArgumentError(msg))
       	end
-        (beta < 0 || beta >= 1) &&  throw(ArgumentError("beta must be [0, 1)"))
+        (beta < 0 || beta > 1) &&  throw(ArgumentError("beta must be [0, 1]"))
 
         # verify input integrity 2
  	num_states, num_actions = size(R)
@@ -103,8 +103,10 @@ mutable struct DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind,TQ<:AbstractArray{T,NQ}
         if NR != 1
             throw(ArgumentError("R must be 1-dimensional with s-a formulation"))
         end
-        (beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)"))
-
+        (beta < 0 || beta > 1) && throw(ArgumentError("beta must be [0, 1]"))
+        if beta == 1
+            @warn("infinite horizon solution methods are disabled with beta=1")
+        end
         # verify input integrity (same length)
         num_sa_pairs, num_states = size(Q)
         if length(R) != num_sa_pairs
@@ -224,7 +226,7 @@ This refers to the Value Iteration solution algorithm.
 References
 ----------
 
-https://lectures.quantecon.org/py/discrete_dp.html
+https://lectures.quantecon.org/jl/discrete_dp.html
 
 """
 struct VFI <: DDPAlgorithm end
@@ -235,9 +237,9 @@ This refers to the Policy Iteration solution algorithm.
 References
 ----------
 
-https://lectures.quantecon.org/py/discrete_dp.html
+https://lectures.quantecon.org/jl/discrete_dp.html
 
-"""
+"""  
 struct PFI <: DDPAlgorithm end
 
 """
@@ -246,7 +248,7 @@ This refers to the Modified Policy Iteration solution algorithm.
 References
 ----------
 
-https://lectures.quantecon.org/py/discrete_dp.html
+https://lectures.quantecon.org/jl/discrete_dp.html
 
 """
 struct MPFI <: DDPAlgorithm end
@@ -453,6 +455,10 @@ Compute the value of a policy.
 
 """
 function evaluate_policy(ddp::DiscreteDP, sigma::AbstractVector{T}) where T<:Integer
+    if ddp.beta == 1.0
+        throw(ArgumentError("method invalid for beta = 1"))
+    end
+
     R_sigma, Q_sigma = RQ_sigma(ddp, sigma)
     b = R_sigma
     A = I - ddp.beta * Q_sigma
@@ -501,6 +507,60 @@ function solve(ddp::DiscreteDP{T}, v_init::AbstractVector{T},
     ddpr
 end
 
+"""
+    backward_induction(ddp, J[, v_term=zeros(num_states(ddp))])
+
+Solve by backward induction a ``J``-period finite horizon discrete dynamic 
+program with stationary reward ``r`` and transition probability functions ``q``
+and discount factor ``\\beta \\in [0, 1]``.
+
+The optimal value functions ``v^{\\ast}_1, \\ldots, v^{\\ast}_{J+1}`` and 
+policy functions ``\\sigma^{\\ast}_1, \\ldots, \\sigma^{\\ast}_J`` are obtained
+by ``v^{\\ast}_{J+1} = v_{J+1}``, and
+
+```math
+v^{\\ast}_j(s) = \\max_{a \\in A(s)} r(s, a) +
+\\beta \\sum_{s' \\in S} q(s'|s, a) v^{\\ast}_{j+1}(s')
+\\quad (s \\in S)
+```
+and
+```math 
+\\sigma^{\\ast}_j(s) \\in \\operatorname*{arg\\,max}_{a \\in A(s)}
+            r(s, a) + \\beta \\sum_{s' \\in S} q(s'|s, a) v^*_{j+1}(s')
+            \\quad (s \\in S)
+```
+
+for ``j= J, \\ldots, 1``, where the terminal value function ``v_{J+1}`` is 
+exogenously given by `v_term`.
+
+# Parameters
+
+- `ddp::DiscreteDP{T}` : Object that contains the Model Parameters
+- `J::Integer`: Number of decision periods
+- `v_term::AbstractVector{<:Real}=zeros(num_states(ddp))`: Terminal value 
+  function of length equal to n (the number of states)  
+
+# Returns
+
+- `vs::Matrix{S}`: Array of shape (n, J+1) where `vs[:,j]`  contains the 
+  optimal value function at period j = 1, ..., J+1.
+- `sigmas::Matrix{Int}`: Array of shape (n, J) where `sigmas[:,j]` contains the
+  optimal policy function at period j = 1, ..., J.
+"""
+function backward_induction(ddp::DiscreteDP{T}, J::Integer,
+                            v_term::AbstractVector{<:Real}=
+                            zeros(num_states(ddp))) where {T}
+    n = num_states(ddp)
+    S = typeof(zero(T)/one(T))
+    vs = Matrix{S}(undef, n, J+1)
+    vs[:,end] = v_term
+    sigmas = Matrix{Int}(undef, n, J)
+    @inbounds for j in J+1: -1: 2
+        @views bellman_operator!(ddp, vs[:,j], vs[:,j-1], sigmas[:,j-1])
+    end
+    return vs, sigmas
+end
+
 # --------- #
 # Other API #
 # --------- #
@@ -777,6 +837,8 @@ function _solve!(
     )
     if ddp.beta == 0.0
         tol = Inf
+    elseif ddp.beta == 1.0
+        throw(ArgumentError("method invalid for beta = 1"))
     else
         tol = epsilon * (1-ddp.beta) / (2*ddp.beta)
     end
@@ -811,6 +873,10 @@ function _solve!(
     )
     old_sigma = copy(ddpr.sigma)
 
+    if ddp.beta == 1.0
+        throw(ArgumentError("method invalid for beta = 1"))
+    end
+
     for i in 1:max_iter
        ddpr.v = evaluate_policy(ddp, ddpr)
        compute_greedy!(ddp, ddpr)
@@ -836,6 +902,10 @@ function _solve!(
         ddp::DiscreteDP, ddpr::DPSolveResult{MPFI}, max_iter::Integer,
         epsilon::Real, k::Integer
     )
+    if ddp.beta == 1.0
+        throw(ArgumentError("method invalid for beta = 1"))
+    end
+
     beta = ddp.beta
     fill!(ddpr.v, minimum(ddp.R[ddp.R .> -Inf]) / (1.0 - beta))
     old_sigma = copy(ddpr.sigma)

diff --git a/test/test_ddp.jl b/test/test_ddp.jl
@@ -24,6 +24,7 @@ Tests for markov/ddp.jl
     Q[:, :, 2] = [0.5 1.0; 1.0 1.0]
 
     ddp0 = DiscreteDP(R, Q, beta)
+    ddp0_b1 = DiscreteDP(R, Q, 1.0)
 
     # Formulation with state-action pairs
     L = 3  # Number of state-action pairs
@@ -35,11 +36,13 @@ Tests for markov/ddp.jl
     Q_sa[2, :] = Q[1, 2, :]
     Q_sa[3, :] = Q[2, 1, :]
     ddp0_sa = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
+    ddp0_sa_b1 = DiscreteDP(R_sa, Q_sa, 1.0, s_indices, a_indices)
 
     @test issparse(ddp0_sa.Q)
 
     # List of ddp formulations
     ddp0_collection = (ddp0, ddp0_sa)
+    ddp0_b1_collection = (ddp0_b1, ddp0_sa_b1)
 
     # Maximum Iteration and Epsilon for Tests
     max_iter = 200
@@ -86,6 +89,10 @@ Tests for markov/ddp.jl
         for ddp in ddp0_collection
         	@test isapprox(evaluate_policy(ddp, sigma_star), v_star)
         end
+        # Check beta = 1.0 is not allowed
+        for ddp in ddp0_b1_collection
+            @test_throws ArgumentError evaluate_policy(ddp,sigma_star)
+        end
     end
 
     @testset "methods for subtypes != (Float64, Int)" begin
@@ -147,6 +154,10 @@ Tests for markov/ddp.jl
             @test res.sigma == sigma_star
             @test res_init.sigma == sigma_star
         end
+        # Check beta = 1.0 is not allowed
+        for ddp_item in ddp0_b1_collection
+            @test_throws ArgumentError solve(ddp_item, VFI)
+        end
     end
 
     @testset "policy_iteration" begin
@@ -164,6 +175,10 @@ Tests for markov/ddp.jl
             @test res.sigma == sigma_star
             @test res_init.sigma == sigma_star
         end
+        # Check beta = 1.0 is not allowed
+        for ddp_item in ddp0_b1_collection
+            @test_throws ArgumentError solve(ddp_item, VFI)
+        end
     end
 
     @testset "DiscreteDP{Rational,_,_,Rational} maintains Rational" begin
@@ -216,6 +231,56 @@ Tests for markov/ddp.jl
             # Check sigma == sigma_star
             @test res.sigma == sigma_star
         end
+        # Check beta = 1.0 is not allowed
+        for ddp_item in ddp0_b1_collection
+            @test_throws ArgumentError solve(ddp_item, MPFI)
+        end
+    end
+
+    @testset "Backward induction" begin
+        # From Puterman 2005, Section 3.2, Section 4.6.1
+        # "single-product stochastic inventory control"
+
+        #set up DDP constructor
+        s_indices = [1, 1, 1, 1, 2, 2, 2, 3, 3, 4]
+        a_indices = [1, 2, 3, 4, 1, 2, 3, 1, 2, 1]
+        R = [ 0//1, -1//1, -2//1, -5//1,  5//1,  0//1, -3//1,  6//1, -1//1,  5//1]
+        Q = [ 1//1 0//1 0//1 0//1;
+              3//4 1//4 0//1 0//1;
+              1//4 1//2 1//4 0//1;
+              0//1 1//4 1//2 1//4;
+              3//4 1//4 0//1 0//1;
+              1//4 1//2 1//4 0//1;
+              0//1 1//4 1//2 1//4;
+              1//4 1//2 1//4 0//1;
+              0//1 1//4 1//2 1//4;
+              0//1 1//4 1//2 1//4]
+        beta = 1
+        ddp_rational = DiscreteDP(R, Q, beta, s_indices, a_indices)
+        R = convert.(Float64, R)
+        Q = convert.(Float64, Q)
+        ddp_float = DiscreteDP(R, Q, beta, s_indices, a_indices)
+
+        # test for backward induction
+        J = 3
+        # expected results
+        vs_expected = [67//16  2     0  0;
+                       129//16 25//4 5  0;
+                       194//16 10    6  0;
+                       227//16 21//2 5  0]
+        sigmas_expected = [4  3  1;
+                           1  1  1;
+                           1  1  1;
+                           1  1  1]
+
+        vs, sigmas = backward_induction(ddp_rational, J)
+        @test vs == vs_expected
+        @test sigmas == sigmas_expected
+
+        vs, sigmas = backward_induction(ddp_float, J)
+        @test isapprox(vs, vs_expected)
+        @test sigmas == sigmas_expected
+
     end
 
     @testset "DDPsa constructor" begin
@@ -231,9 +296,8 @@ Tests for markov/ddp.jl
         _s_ind = [1, 1, 2]
         _a_ind = [1, 2, 1]
 
-        @testset "beta in [0, 1)" begin
+        @testset "beta in [0, 1]" begin
             @test_throws ArgumentError DiscreteDP(_R, _Q, -eps(), _s_ind, _a_ind)
-            @test_throws ArgumentError DiscreteDP(_R, _Q, 1.0, _s_ind, _a_ind)
             @test_throws ArgumentError DiscreteDP(_R, _Q, 1+eps(), _s_ind, _a_ind)
         end
 
@@ -257,9 +321,8 @@ Tests for markov/ddp.jl
     end
 
     @testset "DDP constructor" begin
-        @testset "beta in [0, 1)" begin
+        @testset "beta in [0, 1]" begin
             @test_throws ArgumentError DiscreteDP(R, Q, -eps())
-            @test_throws ArgumentError DiscreteDP(R, Q, 1.0)
             @test_throws ArgumentError DiscreteDP(R, Q, 1+eps())
         end