discrete_distribution.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
14 #define MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 #include <mlpack/core/util/log.hpp>
19 
20 namespace mlpack {
21 namespace distribution {
22 
46 {
47  public:
52  probabilities(std::vector<arma::vec>(1)){ /* Nothing to do. */ }
53 
62  DiscreteDistribution(const size_t numObservations) :
63  probabilities(std::vector<arma::vec>(1,
64  arma::ones<arma::vec>(numObservations) / numObservations))
65  { /* Nothing to do. */ }
66 
75  DiscreteDistribution(const arma::Col<size_t>& numObservations)
76  {
77  for (size_t i = 0; i < numObservations.n_elem; i++)
78  {
79  const size_t numObs = size_t(numObservations[i]);
80  if (numObs <= 0)
81  {
82  std::ostringstream oss;
83  oss << "number of observations for dimension " << i << " is 0, but "
84  << "must be greater than 0";
85  throw std::invalid_argument(oss.str());
86  }
87  probabilities.push_back(arma::ones<arma::vec>(numObs) / numObs);
88  }
89  }
90 
97  DiscreteDistribution(const std::vector<arma::vec>& probabilities)
98  {
99  for (size_t i = 0; i < probabilities.size(); i++)
100  {
101  arma::vec temp = probabilities[i];
102  double sum = accu(temp);
103  if (sum > 0)
104  this->probabilities.push_back(temp / sum);
105  else
106  {
107  this->probabilities.push_back(arma::ones<arma::vec>(temp.n_elem)
108  / temp.n_elem);
109  }
110  }
111  }
112 
116  size_t Dimensionality() const { return probabilities.size(); }
117 
126  double Probability(const arma::vec& observation) const
127  {
128  double probability = 1.0;
129  // Ensure the observation has the same dimension with the probabilities
130  if (observation.n_elem != probabilities.size())
131  {
132  Log::Fatal << "DiscreteDistribution::Probability(): observation has "
133  << "incorrect dimension " << observation.n_elem << " but should have "
134  << "dimension " << probabilities.size() << "!" << std::endl;
135  }
136 
137  for (size_t dimension = 0; dimension < observation.n_elem; dimension++)
138  {
139  // Adding 0.5 helps ensure that we cast the floating point to a size_t
140  // correctly.
141  const size_t obs = size_t(observation(dimension) + 0.5);
142 
143  // Ensure that the observation is within the bounds.
144  if (obs >= probabilities[dimension].n_elem)
145  {
146  Log::Fatal << "DiscreteDistribution::Probability(): received "
147  << "observation " << obs << "; observation must be in [0, "
148  << probabilities[dimension].n_elem << "] for this distribution."
149  << std::endl;
150  }
151  probability *= probabilities[dimension][obs];
152  }
153 
154  return probability;
155  }
156 
165  double LogProbability(const arma::vec& observation) const
166  {
167  // TODO: consider storing log probabilities instead?
168  return log(Probability(observation));
169  }
170 
178  arma::vec Random() const;
179 
187  void Train(const arma::mat& observations);
188 
198  void Train(const arma::mat& observations,
199  const arma::vec& probabilities);
200 
202  arma::vec& Probabilities(const size_t dim = 0) { return probabilities[dim]; }
204  const arma::vec& Probabilities(const size_t dim = 0) const
205  { return probabilities[dim]; }
206 
210  template<typename Archive>
211  void serialize(Archive& ar, const unsigned int /* version */)
212  {
213  ar & BOOST_SERIALIZATION_NVP(probabilities);
214  }
215 
216  private:
219  std::vector<arma::vec> probabilities;
220 };
221 
222 } // namespace distribution
223 } // namespace mlpack
224 
225 #endif
DiscreteDistribution()
Default constructor, which creates a distribution that has no observations.
.hpp
Definition: add_to_po.hpp:21
A discrete distribution where the only observations are discrete observations.
The core includes that mlpack expects; standard C++ includes and Armadillo.
Definition: prereqs.hpp:55
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
double LogProbability(const arma::vec &observation) const
Return the log probability of the given observation.
const arma::vec & Probabilities(const size_t dim=0) const
Modify the vector of probabilities for the given dimension.
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
void serialize(Archive &ar, const unsigned int)
Serialize the distribution.
size_t Dimensionality() const
Get the dimensionality of the distribution.
DiscreteDistribution(const arma::Col< size_t > &numObservations)
Define the multidimensional discrete distribution as having numObservations possible observations...
void Train(const arma::mat &observations)
Estimate the probability distribution directly from the given observations.
Miscellaneous math random-related routines.
double Probability(const arma::vec &observation) const
Return the probability of the given observation.
arma::vec & Probabilities(const size_t dim=0)
Return the vector of probabilities for the given dimension.
DiscreteDistribution(const size_t numObservations)
Define the discrete distribution as having numObservations possible observations. ...
DiscreteDistribution(const std::vector< arma::vec > &probabilities)
Define the multidimensional discrete distribution as having the given probabilities for each observat...