missing_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
18 #include <limits>
19 
20 namespace mlpack {
21 namespace data {
22 
31 {
32  public:
33  // typedef of MappedType
34  using MappedType = double;
35 
37  {
38  // Nothing to initialize here.
39  }
40 
48  explicit MissingPolicy(std::set<std::string> missingSet) :
49  missingSet(std::move(missingSet))
50  {
51  // Nothing to initialize here.
52  }
53 
55  static const bool NeedsFirstPass = false;
56 
61  template<typename T>
62  void MapFirstPass(const std::string& /* string */, const size_t /* dim */)
63  {
64  // Nothing to do.
65  }
66 
81  template<typename MapType, typename T>
82  T MapString(const std::string& string,
83  const size_t dimension,
84  MapType& maps,
85  std::vector<Datatype>& /* types */)
86  {
87  static_assert(std::numeric_limits<T>::has_quiet_NaN == true,
88  "Cannot use MissingPolicy with types where has_quiet_NaN() is false!");
89 
90  // If we can load the string then there is no need for mapping.
91  std::stringstream token;
92  token.str(string);
93  T t;
94  token >> t; // Could be sped up by only doing this if we need to.
95 
96  MappedType value = std::numeric_limits<MappedType>::quiet_NaN();
97  // But we can't use that for the map, so we need some other thing that will
98  // represent quiet_NaN().
99  const MappedType mapValue = std::nexttoward(
100  std::numeric_limits<MappedType>::max(), MappedType(0));
101 
102  // If extraction of the value fails, or if it is a value that is supposed to
103  // be mapped, then do mapping.
104  if (token.fail() || !token.eof() ||
105  missingSet.find(string) != std::end(missingSet))
106  {
107  // Everything is mapped to NaN. However we must still keep track of
108  // everything that we have mapped, so we add it to the maps if needed.
109  if (maps.count(dimension) == 0 ||
110  maps[dimension].first.count(string) == 0)
111  {
112  // This string does not exist yet.
113  typedef std::pair<std::string, MappedType> PairType;
114  maps[dimension].first.insert(PairType(string, value));
115 
116  // Insert right mapping too.
117  if (maps[dimension].second.count(mapValue) == 0)
118  {
119  // Create new element in reverse map.
120  maps[dimension].second.insert(std::make_pair(mapValue,
121  std::vector<std::string>()));
122  }
123  maps[dimension].second[mapValue].push_back(string);
124  }
125 
126  return value;
127  }
128  else
129  {
130  // We can just return the value that we read.
131  return t;
132  }
133  }
134 
135  private:
136  // Note that missingSet and maps are different.
137  // missingSet specifies which value/string should be mapped and may be a
138  // superset of 'maps'.
139  std::set<std::string> missingSet;
140 }; // class MissingPolicy
141 
142 } // namespace data
143 } // namespace mlpack
144 
145 #endif
strip_type.hpp
Definition: add_to_po.hpp:21
The core includes that mlpack expects; standard C++ includes and Armadillo.
Definition: prereqs.hpp:55
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
void MapFirstPass(const std::string &, const size_t)
There is nothing for us to do here, but this is required by the MapPolicy type.
static const bool NeedsFirstPass
This doesn&#39;t need a first pass over the data to set up.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
MissingPolicy is used as a helper class for DatasetMapper.