Skip to content

Commit

Permalink
Refactor Optional arg and Regression interface.
Browse files Browse the repository at this point in the history
- Renamed spec type to usage.
- Created special constructors for regression models to hide the
  optional arguments. Each type is now defined only in that module and
  does not pollute the main Regression module.
- Changed to using polymorphic variants to express regularizers.
  • Loading branch information
rleonid committed Jan 29, 2016
1 parent ad7e126 commit d1edf4b
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 134 deletions.
40 changes: 20 additions & 20 deletions src/lib/classify.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ module type Classifier_intf = sig
val eval : t -> feature -> clas probabilities

type samples = (clas * feature) list
val estimate : ?spec:spec -> ?classes:clas list -> samples -> t
val estimate : ?opt:opt -> ?classes:clas list -> samples -> t
end

module type Generative_intf = sig
Expand Down Expand Up @@ -136,7 +136,7 @@ let estimate_naive_bayes modulename (type c) init update incorporate
module BinomialNaiveBayes(Data: Dummy_encoded_data_intf)
: (Generative_intf with type feature = Data.feature
and type clas = Data.clas
and type spec = binomial_spec)
and type opt = binomial_spec)
= struct

type feature = Data.feature
Expand Down Expand Up @@ -174,12 +174,12 @@ module BinomialNaiveBayes(Data: Dummy_encoded_data_intf)
in
eval_naive_bayes ~to_prior ~to_likelihood nb.table

type spec = binomial_spec
type opt = binomial_spec
let default = { smoothing = 0.0; bernoulli = false }

module Cm = Map.Make(struct type t = clas let compare = compare end)

let estimate ?(spec=default) ?classes data =
let estimate ?(opt=default) ?classes data =
let aa = Data.size + 1 in
let init _cls = Array.make aa 0 in
let update arr ftr =
Expand All @@ -190,7 +190,7 @@ module BinomialNaiveBayes(Data: Dummy_encoded_data_intf)
arr
in
let incorporate all num_classes totalf =
let to_prob = smoothing_to_prob spec.smoothing in
let to_prob = smoothing_to_prob opt.smoothing in
List.map all ~f:(fun (cl, attr_count) ->
let prior_count = float attr_count.(Data.size) in
let likelihood =
Expand All @@ -206,7 +206,7 @@ module BinomialNaiveBayes(Data: Dummy_encoded_data_intf)
estimate_naive_bayes "BinomialNaiveBayes"
init update incorporate (module Cm) ?classes data
in
{table ; e_bernoulli = spec.bernoulli}
{table ; e_bernoulli = opt.bernoulli}

let class_probabilities nb cls =
let arr = List.assoc cls nb.table in
Expand Down Expand Up @@ -235,7 +235,7 @@ type smoothing = float
module CategoricalNaiveBayes(Data: Category_encoded_data_intf)
: (Generative_intf with type feature = Data.feature
and type clas = Data.clas
and type spec = smoothing)
and type opt = smoothing)

= struct

Expand Down Expand Up @@ -274,20 +274,20 @@ module CategoricalNaiveBayes(Data: Category_encoded_data_intf)
in
eval_naive_bayes ~to_prior ~to_likelihood table

type spec = smoothing
type opt = smoothing
let default = 0.0

module Cm = Map.Make(struct type t = clas let compare = compare end)

let estimate ?(spec=default) =
let estimate ?(opt=default) =
let init _ = (0, Array.map (fun i -> Array.make i 0) Data.encoding_sizes) in
let update (c, arr) ftr =
let ftr_arr = safe_encoding ftr in
Array.iteri (fun i j -> arr.(i).(j) <- arr.(i).(j) + 1) ftr_arr;
(c + 1, arr)
in
let incorporate all num_classes totalf =
let to_prob = smoothing_to_prob spec in
let to_prob = smoothing_to_prob opt in
List.map all ~f:(fun (cl, (class_count, attr_count)) ->
let prior = to_prob (float class_count) totalf (float num_classes) in
let likelihood =
Expand Down Expand Up @@ -322,7 +322,7 @@ let to_safe_encoding_size_checked interfacename size encoding f =
module GaussianNaiveBayes(Data: Continuous_encoded_data_intf)
: (Generative_intf with type feature = Data.feature
and type clas = Data.clas
and type spec = unit)
and type opt = unit)

= struct

Expand Down Expand Up @@ -353,13 +353,13 @@ module GaussianNaiveBayes(Data: Continuous_encoded_data_intf)
in
eval_naive_bayes ~to_prior ~to_likelihood table

type spec = unit
type opt = unit
let default = ()

module Cm = Map.Make(struct type t = clas let compare = compare end)

let estimate ?(spec=default) =
ignore spec;
let estimate ?(opt=default) =
ignore opt;
let init _c = (0, Array.make Data.size Running.empty) in
let update (c, rs_arr) ftr =
let attr = safe_encoding ftr in
Expand Down Expand Up @@ -396,7 +396,7 @@ module LrCommon(Data: Continuous_encoded_data_intf) = struct

type samples = (clas * feature) list

type spec = log_reg_spec
type opt = log_reg_spec
let default = { lambda = 1e-4
; tolerance = 1e4
}
Expand All @@ -408,7 +408,7 @@ module LrCommon(Data: Continuous_encoded_data_intf) = struct
let copy1 arr = Array.init (Data.size + 1) (function | 0 -> 1. | i -> arr.(i - 1))

(* map classes to [1;2 ... 3], convert features to matrix and run Softmax *)
let estimate ~method_name ~class_bound ~to_t ?(spec=default) ?(classes=[]) data =
let estimate ~method_name ~class_bound ~to_t ?(opt=default) ?(classes=[]) data =
let class_bound =
match class_bound with
| None -> fun n -> n
Expand Down Expand Up @@ -444,8 +444,8 @@ module LrCommon(Data: Continuous_encoded_data_intf) = struct
in
let weights =
Softmax_regression.regress
~lambda:spec.lambda
~tolerance:spec.tolerance
~lambda:opt.lambda
~tolerance:opt.tolerance
ftrs classes
in
let sortedc =
Expand All @@ -461,7 +461,7 @@ module LogisticRegression(Data: Continuous_encoded_data_intf)
: sig
include Classifier_intf with type feature = Data.feature
and type clas = Data.clas
and type spec = log_reg_spec
and type opt = log_reg_spec

val coefficients : t -> float array

Expand Down Expand Up @@ -504,7 +504,7 @@ module MulticlassLogisticRegression(Data: Continuous_encoded_data_intf)
: sig
include Classifier_intf with type feature = Data.feature
and type clas = Data.clas
and type spec = log_reg_spec
and type opt = log_reg_spec

val coefficients : t -> float array array

Expand Down
16 changes: 8 additions & 8 deletions src/lib/classify.mli
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,22 @@ module type Classifier_intf = sig
(** Representing training data. *)
type samples = (clas * feature) list

(** [estimate spec classes samples] estimates a classifier based upon the
(** [estimate opt classes samples] estimates a classifier based upon the
training [samples].
[classes] is an optional argument to specify ahead of time the possible
classes to train on (defaults to the ones found in the training data).
This is useful for models where we know the population domain but may
not see an example of a training datum for rare cases.
[spec] are the optional classifier dependent estimation/evaluation
[opt] are the optional classifier dependent estimation/evaluation
arguments.
@raise Invalid_argument if [classes] are specified and new ones are
found in the training [samples].
@raise Invalid_argument if [samples] is empty.
*)
val estimate : ?spec:spec -> ?classes:clas list -> samples -> t
val estimate : ?opt:opt -> ?classes:clas list -> samples -> t
end

(** A generative classifier builds models of the form
Expand Down Expand Up @@ -172,7 +172,7 @@ type binomial_spec =
classifier on data encoded using
{{!modtype:Dummy_encoded_data_intf}Dummy variables.} *)
module BinomialNaiveBayes(D: Dummy_encoded_data_intf) :
Generative_intf with type spec = binomial_spec
Generative_intf with type opt = binomial_spec
and type feature = D.feature
and type clas = D.clas

Expand All @@ -181,7 +181,7 @@ module BinomialNaiveBayes(D: Dummy_encoded_data_intf) :
classifier on data encoded using
{{!modtype:Category_encoded_data_intf}Categorical variables.} *)
module CategoricalNaiveBayes(D: Category_encoded_data_intf) :
Generative_intf with type spec = smoothing
Generative_intf with type opt = smoothing
and type feature = D.feature
and type clas = D.clas

Expand All @@ -191,7 +191,7 @@ module CategoricalNaiveBayes(D: Category_encoded_data_intf) :
for each of the quantitative features in the
{{!modtype:Continuous_encoded_data_intf}encoded data}. *)
module GaussianNaiveBayes(D: Continuous_encoded_data_intf) :
Generative_intf with type spec = unit
Generative_intf with type opt = unit
and type feature = D.feature
and type clas = D.clas

Expand Down Expand Up @@ -230,7 +230,7 @@ type log_reg_spec =
*)
module LogisticRegression(D: Continuous_encoded_data_intf) :
sig
include Classifier_intf with type spec = log_reg_spec
include Classifier_intf with type opt = log_reg_spec
and type feature = D.feature
and type clas = D.clas

Expand Down Expand Up @@ -259,7 +259,7 @@ module LogisticRegression(D: Continuous_encoded_data_intf) :
*)
module MulticlassLogisticRegression(D: Continuous_encoded_data_intf) :
sig
include Classifier_intf with type spec = log_reg_spec
include Classifier_intf with type opt = log_reg_spec
and type feature = D.feature
and type clas = D.clas

Expand Down
2 changes: 1 addition & 1 deletion src/lib/classify.mlt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ let () =
let size = 5
end)
in
let naiveb = NB.estimate ~spec:{NB.default with bernoulli = true } data in
let naiveb = NB.estimate ~opt:{NB.default with bernoulli = true } data in
let sample = [ `shortbread ; `whiskey; `porridge ] in
let result = NB.eval naiveb sample in
let expect =
Expand Down
Loading

0 comments on commit d1edf4b

Please sign in to comment.