refined_start.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_KMEANS_REFINED_START_HPP
15 #define MLPACK_METHODS_KMEANS_REFINED_START_HPP
16 
17 #include <mlpack/prereqs.hpp>
18 
19 namespace mlpack {
20 namespace kmeans {
21 
38 {
39  public:
45  RefinedStart(const size_t samplings = 100,
46  const double percentage = 0.02) :
47  samplings(samplings), percentage(percentage) { }
48 
59  template<typename MatType>
60  void Cluster(const MatType& data,
61  const size_t clusters,
62  arma::mat& centroids) const;
63 
75  template<typename MatType>
76  void Cluster(const MatType& data,
77  const size_t clusters,
78  arma::Row<size_t>& assignments) const;
79 
81  size_t Samplings() const { return samplings; }
83  size_t& Samplings() { return samplings; }
84 
86  double Percentage() const { return percentage; }
88  double& Percentage() { return percentage; }
89 
91  template<typename Archive>
92  void serialize(Archive& ar, const unsigned int /* version */)
93  {
94  ar & BOOST_SERIALIZATION_NVP(samplings);
95  ar & BOOST_SERIALIZATION_NVP(percentage);
96  }
97 
98  private:
100  size_t samplings;
102  double percentage;
103 };
104 
105 } // namespace kmeans
106 } // namespace mlpack
107 
108 // Include implementation.
109 #include "refined_start_impl.hpp"
110 
111 #endif
strip_type.hpp
Definition: add_to_po.hpp:21
RefinedStart(const size_t samplings=100, const double percentage=0.02)
Create the RefinedStart object, optionally specifying parameters for the number of samplings to perfo...
The core includes that mlpack expects; standard C++ includes and Armadillo.
void serialize(Archive &ar, const unsigned int)
Serialize the object.
double & Percentage()
Modify the percentage of the data used by each subsampling.
double Percentage() const
Get the percentage of the data used by each subsampling.
void Cluster(const MatType &data, const size_t clusters, arma::mat &centroids) const
Partition the given dataset into the given number of clusters according to the random sampling scheme...
size_t & Samplings()
Modify the number of samplings that will be performed.
A refined approach for choosing initial points for k-means clustering.
size_t Samplings() const
Get the number of samplings that will be performed.