missing_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
18 #include <limits>
19 
20 namespace mlpack {
21 namespace data {
22 
31 {
32  public:
33  // typedef of MappedType
34  using MappedType = double;
35 
37  {
38  // Nothing to initialize here.
39  }
40 
48  explicit MissingPolicy(std::set<std::string> missingSet) :
49  missingSet(std::move(missingSet))
50  {
51  // Nothing to initialize here.
52  }
53 
55  static const bool NeedsFirstPass = false;
56 
61  template<typename T>
62  void MapFirstPass(const std::string& /* string */, const size_t /* dim */)
63  {
64  // Nothing to do.
65  }
66 
82  template<typename MapType, typename T>
83  T MapString(const std::string& string,
84  const size_t dimension,
85  MapType& maps,
86  std::vector<Datatype>& /* types */)
87  {
88  static_assert(std::numeric_limits<T>::has_quiet_NaN == true,
89  "Cannot use MissingPolicy with types where has_quiet_NaN() is false!");
90 
91  // If we can load the string then there is no need for mapping.
92  std::stringstream token;
93  token.str(string);
94  T t;
95  token >> t; // Could be sped up by only doing this if we need to.
96 
97  MappedType value = std::numeric_limits<MappedType>::quiet_NaN();
98  // But we can't use that for the map, so we need some other thing that will
99  // represent quiet_NaN().
100  const MappedType mapValue = std::nexttoward(
101  std::numeric_limits<MappedType>::max(), MappedType(0));
102 
103  // If extraction of the value fails, or if it is a value that is supposed to
104  // be mapped, then do mapping.
105  if (token.fail() || !token.eof() ||
106  missingSet.find(string) != std::end(missingSet))
107  {
108  // Everything is mapped to NaN. However we must still keep track of
109  // everything that we have mapped, so we add it to the maps if needed.
110  if (maps.count(dimension) == 0 ||
111  maps[dimension].first.count(string) == 0)
112  {
113  // This string does not exist yet.
114  typedef std::pair<std::string, MappedType> PairType;
115  maps[dimension].first.insert(PairType(string, value));
116 
117  // Insert right mapping too.
118  if (maps[dimension].second.count(mapValue) == 0)
119  {
120  // Create new element in reverse map.
121  maps[dimension].second.insert(std::make_pair(mapValue,
122  std::vector<std::string>()));
123  }
124  maps[dimension].second[mapValue].push_back(string);
125  }
126 
127  return value;
128  }
129  else
130  {
131  // We can just return the value that we read.
132  return t;
133  }
134  }
135 
136  private:
137  // Note that missingSet and maps are different.
138  // missingSet specifies which value/string should be mapped and may be a
139  // superset of 'maps'.
140  std::set<std::string> missingSet;
141 }; // class MissingPolicy
142 
143 } // namespace data
144 } // namespace mlpack
145 
146 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_po.hpp:21
The core includes that mlpack expects; standard C++ includes and Armadillo.
Definition: prereqs.hpp:55
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
void MapFirstPass(const std::string &, const size_t)
There is nothing for us to do here, but this is required by the MapPolicy type.
static const bool NeedsFirstPass
This doesn&#39;t need a first pass over the data to set up.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
MissingPolicy is used as a helper class for DatasetMapper.