HOG2
gLSSLRTAStar.h
Go to the documentation of this file.
1 //
2 // gLSSLRTAStar.h
3 // hog2 glut
4 //
5 // Created by Nathan Sturtevant on 11/10/15.
6 // Copyright © 2015 University of Denver. All rights reserved.
7 //
8 
9 #ifndef gLSSLRTAStar_h
10 #define gLSSLRTAStar_h
11 
12 #include "LearningAlgorithm.h"
13 #include "FPUtil.h"
14 #include <deque>
15 #include <vector>
16 #include <unordered_map>
17 #include "TemplateAStar.h"
18 #include "Timer.h"
19 #include <queue>
20 #include <iostream>
21 
22 //static bool verbose = false;
23 
24 //template <class state>
25 //class borderData {
26 //public:
27 // borderData(const state &s, const double h) :theState(s), heuristic(h) {}
28 // state theState;
29 // double heuristic;
30 //};
31 //
32 //template <class state>
33 //class compareBorderData
34 //{
35 //public:
36 // bool operator() (const borderData<state> &lhs, const borderData<state> &rhs) const
37 // {
38 // return (lhs.heuristic > rhs.heuristic);
39 // }
40 //};
41 //
42 
43 template <class state>
45  glssLearnedData() { dead = false; gCost = DBL_MAX; }
46  state theState;
47  bool dead;
48  double theHeuristic;
49  double gCost;
50 };
51 
52 template <class state, class action, class environment>
53 class gLSSLRTAStar : public LearningAlgorithm<state,action,environment>, public Heuristic<state> {
54 public:
55  gLSSLRTAStar(int nodeLimit = 8)
56  {
57  fAmountLearned = 0.0f;
58  nodeExpansionLimit = nodeLimit;
59  avoidLearning = false;
60  minInitialHeuristic = DBL_MAX;
62  initialHeuristic = true;
63  firstRun = true;
65  }
66  virtual ~gLSSLRTAStar(void) { }
67 
68  void SetAvoidLearning(bool val) { avoidLearning = val; }
69  void GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath);
70  void GetPath(environment *, const state& , const state& , std::vector<action> & ) { assert(false); };
71  virtual const char *GetName()
72  { static char name[255];
73  sprintf(name, "gLSSLRTAStar(%d)", nodeExpansionLimit); return name; }
74  void SetHCost(environment *env, const state &where, const state &to, double val)
75  {
76  auto loc = heur.find(m_pEnv->GetStateHash(where));
77  if (loc != heur.end())
78  {
79  heur[env->GetStateHash(where)].theHeuristic = val-BaseHCost(env, where, to);
80  }
81  else {
82  heur[env->GetStateHash(where)].theHeuristic = val-BaseHCost(env, where, to);
83  heur[env->GetStateHash(where)].theState = where;
84  heur[env->GetStateHash(where)].gCost = DBL_MAX;
85  }
86  }
87  double HCostLearned(const state &from)
88  {
89  if (heur.find(m_pEnv->GetStateHash(from)) != heur.end())
90  return heur[m_pEnv->GetStateHash(from)].theHeuristic;
91  return 0;
92  }
93  void SetGCost(const state &s, double cost)
94  {
95  if (verbose) std::cout << "Setting g-cost of " << s << " to " << cost << "\n";
96  auto val = heur.find(m_pEnv->GetStateHash(s));
97  if (val != heur.end())
98  {
99  heur[m_pEnv->GetStateHash(s)].gCost = cost;
100  }
101  else {
102  // initialize
103  heur[m_pEnv->GetStateHash(s)].theHeuristic = 0;
104  heur[m_pEnv->GetStateHash(s)].theState = s;
105  heur[m_pEnv->GetStateHash(s)].gCost = cost;
106  }
107  }
108  double GCost(const state &s)
109  {
110  auto val = heur.find(m_pEnv->GetStateHash(s));
111  if (val != heur.end())
112  {
113  return val->second.gCost;
114  }
115  return DBL_MAX;
116  }
117  double HCost(environment *env, const state &from, const state &to) const
118  {
119  auto val = heur.find(env->GetStateHash(from));
120  if (val != heur.end())
121  {
122  return val->second.theHeuristic+env->HCost(from, to);
123  }
124  return BaseHCost(env, from, to);//env->HCost(from, to);
125  // if (heur.find(env->GetStateHash(from)) != heur.end())
126  // return heur[env->GetStateHash(from)].theHeuristic+BaseHCost(env, from, to);
127  // return BaseHCost(env, from, to);
128  }
129  double BaseHCost(environment *env, const state &from, const state &to) const
130  { return initialHeuristicWeight*env->HCost(from, to);
131  }
132  double HCost(const state &from, const state &to) const
133  { return HCost(m_pEnv, from, to); }
134 
136  {
137  double learned = 0;
138  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
139  {
140  double thisState = (*it).second.theHeuristic;
141  if (learned < thisState)
142  learned = thisState;
143  }
144  return learned;
145  }
147  { initialHeuristicWeight = val; }
148 
149  virtual uint64_t GetNodesExpanded() const { return nodesExpanded; }
150  virtual uint64_t GetNodesTouched() const { return nodesTouched; }
151  virtual void LogFinalStats(StatCollection *s)
152  {
153  s->AddStat("TotalLearning", GetName(),fAmountLearned);
154  s->AddStat("MinInitial", GetName(),minInitialHeuristic);
155  s->AddStat("MaxLater", GetName(),maxLaterHeuristic);
156  s->AddStat("MaxStateLearning", GetName(), GetMaxStateLearning());
157  }
158 
159  double GetAmountLearned() { return fAmountLearned; }
160  void OpenGLDraw() const {}
161  void OpenGLDraw(const environment *env) const;
162 private:
163  typedef std::unordered_map<uint64_t, glssLearnedData<state>, Hash64 > LearnedHeuristic;
164  typedef std::unordered_map<uint64_t, bool, Hash64 > ClosedList;
165 
166  environment *m_pEnv;
174  std::vector<state> succ;
176  bool firstRun;
179 };
180 
182 template <class state, class action, class environment>
183 void gLSSLRTAStar<state, action, environment>::GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath)
184 {
185  m_pEnv = env;
186 
187  // only run the first time
188  if (firstRun)
189  {
190  SetGCost(from, 0);
191  firstRun = false;
192  }
193 
194  // This code measures the size of the first heuristic minima that the agent passes over
195  if (initialHeuristic)
196  {
197  double tmp;
198  if ((tmp = BaseHCost(env, from, to)) > minInitialHeuristic)
199  {
200  initialHeuristic = false;
201  maxLaterHeuristic = tmp;
202  }
203  else {
204  minInitialHeuristic = tmp;
205  }
206  }
207  else {
208  maxLaterHeuristic = std::max(BaseHCost(env, from, to), maxLaterHeuristic);
209  }
210  // Done measurement
211 
212  Timer t;
213  t.StartTimer();
214  thePath.resize(0);
215  if (from==to)
216  return;
217 
218  astar.SetHeuristic(this);
219  astar.InitializeSearch(env, from, to, thePath);
220  astar.SetUseBPMX(1);
221  for (int x = 0; x < nodeExpansionLimit; x++)
222  {
223  const state &tmp = astar.CheckNextNode();
224  // update gcosts
225  {
226  env->GetSuccessors(tmp, succ);
227  for (const auto &s : succ)
228  SetGCost(s, std::min(GCost(s), GCost(tmp)+m_pEnv->GCost(tmp, s)));
229  }
230  if (tmp == to)
231  break;
232 
233  astar.DoSingleSearchStep(thePath);
234  // if (astar.DoSingleSearchStep(thePath))
235  // {
236  // // return reversed path
237  // std::reverse(thePath.begin(), thePath.end());
238  // break;
239  // }
240  }
241  nodesExpanded = astar.GetNodesExpanded();
242  nodesTouched = astar.GetNodesTouched();
243  // std::cout << GetName() << " " << nodesExpanded << " expanded by A* with expansion limit " << nodeExpansionLimit << std::endl;
244 
245  if (thePath.size() != 0)
246  return;
247 
248  // 1. put all open nodes in pqueue
249  typedef std::priority_queue<borderData<state>,std::vector<borderData<state> >,compareBorderData<state> > pQueue;
250  pQueue q;
251 
252  double bestF = -1;
253  double bestLearning = -1;
254  state first;
255 
256  unsigned int openSize = astar.GetNumOpenItems();
257  for (unsigned int x = 0; x < openSize; x++)
258  {
259  const auto data = astar.GetOpenItem(x);
260  double currLearning = HCostLearned(data.data);
261  if (avoidLearning)
262  {
263  if ((bestF == -1) ||
264  (currLearning == 0 && bestLearning != 0) ||
265  (currLearning == 0 && bestLearning == 0 && (fless(data.g+data.h, bestF))) ||
266  (currLearning != 0 && bestLearning != 0 && (fless(data.g+data.h, bestF))))
267  {
268  bestLearning = currLearning;
269  bestF = data.g+data.h;
270  first = data.data;
271  }
272  }
273  else {
274  if ((bestF == -1) || (fless(data.g+data.h, bestF)))
275  {
276  bestF = data.g+data.h;
277  first = data.data;
278  }
279  if (fequal(data.g+data.h, bestF))
280  {
281  if (GCost(data.data) > GCost(first))
282  {
283  first = data.data;
284  }
285  }
286  }
287  q.push(borderData<state>(data.data, data.h));
288  if (verbose) std::cout << "Preparing border state: " << data.data << " h: " << data.h << std::endl;
289  }
290 
291  std::vector<state> succ;
292  ClosedList c;
293  while (q.size() > 0)
294  {
295  nodesExpanded++;
296  nodesTouched++;
297  state s = q.top().theState;
298  if (verbose) std::cout << "Starting with " << s << " h: " << q.top().heuristic << "/" << HCost(s, to) << std::endl;
299  q.pop();
300  // std::cout << s << " " << learnData[env->GetStateHash(s)].learnedHeuristic << std::endl;
301  env->GetSuccessors(s, succ);
302  double hCost = HCost(s, to);
303  for (unsigned int x = 0; x < succ.size(); x++)
304  {
305  nodesTouched++;
306  double succHCost;
307  if (!astar.GetClosedListGCost(succ[x], succHCost))
308  {
309  if (verbose) std::cout << succ[x] << " not in closed\n";
310  continue;
311  }
312  double edgeCost = env->GCost(s, succ[x]);
313  if (verbose) std::cout << s << " to " << succ[x] << " " << edgeCost << " ";
314  succHCost = HCost(env, succ[x], to);
315  if (c[env->GetStateHash(succ[x])]) // in closed list, but seen before, update if smaller
316  {
317  if (verbose) std::cout << succ[x] << " updated before ";
318  if (fless(hCost + edgeCost, succHCost))
319  {
320  if (verbose) std::cout << "lowering cost to " << hCost + edgeCost << " from " << succHCost << std::endl;
321  fAmountLearned = fAmountLearned - (succHCost - (hCost+edgeCost));
322  if (verbose) std::cout << " learning now " << fAmountLearned;
323  SetHCost(env, succ[x], to, hCost + edgeCost);
324  q.push(borderData<state>(succ[x], hCost + edgeCost));
325  }
326  if (verbose) std::cout << std::endl;
327  }
328  else { // not expanded before and in closed list, always update
329  // 2. expand successors, checking if they are in closed list and adding to queue
330  // if (fless(succHCost, hCost - edgeCost))
331  if (verbose) std::cout << succ[x] << " NOT updated before ";
332  //if (fgreater(hCost + edgeCost, succHCost))
333  {
334  if (verbose) std::cout << "setting cost to " << hCost + edgeCost << " over " << succHCost;
335  fAmountLearned += (edgeCost + hCost) - succHCost;
336  if (verbose) std::cout << " learning now " << fAmountLearned;
337  SetHCost(env, succ[x], to, hCost + edgeCost);
338  q.push(borderData<state>(succ[x], hCost + edgeCost));
339  c[env->GetStateHash(succ[x])] = true;
340  }
341  if (verbose) std::cout << std::endl;
342  }
343  }
344  }
345  //std::cout << GetName() << " " << nodesExpanded-nodeExpansionLimit << " expanded during learning" << std::endl;
346 
347  // if (thePath.size() != 0)
348  // return;
349 
350  // 3. construct best path
351  astar.ExtractPathToStart(first, thePath);
352  if (verbose) std::cout << "LSS-LRTA* heading towards " << thePath[0] << "with h-value " << HCost(env, thePath[0], to) << std::endl;
353  t.EndTimer();
354  //std::cout << GetName() << "\t" << nodesExpanded << "\t" << t.GetElapsedTime() << "\t" << nodesExpanded/t.GetElapsedTime() << std::endl;
355 }
356 
357 template <class state, class action, class environment>
359 {
360  astar.OpenGLDraw();
361 
362  double learned = 0;
363  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
364  {
365  double thisState = (*it).second.theHeuristic;
366 // double thisState = (*it).second.gCost;
367  if (learned < thisState)
368  learned = thisState;
369  }
370  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
371  {
372  double r = (*it).second.theHeuristic;
373  //double r = (*it).second.gCost;
374  if (r > 0)
375  {
376  e->SetColor(0.5+0.5*r/learned, 0, 0, 0.1+0.8*r/learned);
377  e->OpenGLDraw((*it).second.theState);
378  }
379  }
380 }
381 
382 #endif /* gLSSLRTAStar_h */
gLSSLRTAStar::fAmountLearned
double fAmountLearned
Definition: gLSSLRTAStar.h:168
glssLearnedData::theHeuristic
double theHeuristic
Definition: gLSSLRTAStar.h:48
gLSSLRTAStar::firstRun
bool firstRun
Definition: gLSSLRTAStar.h:176
gLSSLRTAStar::HCost
double HCost(environment *env, const state &from, const state &to) const
Definition: gLSSLRTAStar.h:117
min
double min(double a, double b)
Definition: FPUtil.h:35
gLSSLRTAStar
Definition: gLSSLRTAStar.h:53
gLSSLRTAStar::GetName
virtual const char * GetName()
Definition: gLSSLRTAStar.h:71
Heuristic
Definition: Heuristic.h:30
gLSSLRTAStar::avoidLearning
bool avoidLearning
Definition: gLSSLRTAStar.h:172
Timer::StartTimer
void StartTimer()
Definition: Timer.cpp:9
glssLearnedData::glssLearnedData
glssLearnedData()
Definition: gLSSLRTAStar.h:45
LearningAlgorithm
Definition: LearningAlgorithm.h:15
gLSSLRTAStar::GetPath
void GetPath(environment *env, const state &from, const state &to, std::vector< state > &thePath)
The core routine of LSSLRTAStar – computes at most one-move path.
Definition: gLSSLRTAStar.h:183
gLSSLRTAStar::OpenGLDraw
void OpenGLDraw() const
Definition: gLSSLRTAStar.h:160
gLSSLRTAStar::HCostLearned
double HCostLearned(const state &from)
Definition: gLSSLRTAStar.h:87
FPUtil.h
glssLearnedData::gCost
double gCost
Definition: gLSSLRTAStar.h:49
glssLearnedData::theState
state theState
Definition: gLSSLRTAStar.h:46
gLSSLRTAStar::initialHeuristic
bool initialHeuristic
Definition: gLSSLRTAStar.h:175
gLSSLRTAStar::GetNodesExpanded
virtual uint64_t GetNodesExpanded() const
Definition: gLSSLRTAStar.h:149
gLSSLRTAStar::SetGCost
void SetGCost(const state &s, double cost)
Definition: gLSSLRTAStar.h:93
gLSSLRTAStar::astar
TemplateAStar< state, action, environment > astar
Definition: gLSSLRTAStar.h:173
gLSSLRTAStar::gLSSLRTAStar
gLSSLRTAStar(int nodeLimit=8)
Definition: gLSSLRTAStar.h:55
gLSSLRTAStar::maxLaterHeuristic
double maxLaterHeuristic
Definition: gLSSLRTAStar.h:178
gLSSLRTAStar::SetInititialHeuristicWeight
void SetInititialHeuristicWeight(double val)
Definition: gLSSLRTAStar.h:146
Timer.h
gLSSLRTAStar::succ
std::vector< state > succ
Definition: gLSSLRTAStar.h:174
gLSSLRTAStar::m_pEnv
environment * m_pEnv
Definition: gLSSLRTAStar.h:166
loc
Definition: MapGenerators.cpp:296
gLSSLRTAStar::LogFinalStats
virtual void LogFinalStats(StatCollection *s)
Definition: gLSSLRTAStar.h:151
gLSSLRTAStar::GetMaxStateLearning
double GetMaxStateLearning()
Definition: gLSSLRTAStar.h:135
Hash64
Definition: SearchEnvironment.h:23
gLSSLRTAStar::minInitialHeuristic
double minInitialHeuristic
Definition: gLSSLRTAStar.h:177
gLSSLRTAStar::nodeExpansionLimit
int nodeExpansionLimit
Definition: gLSSLRTAStar.h:171
verbose
const static int verbose
Definition: ClusterAbstraction.cpp:81
fless
bool fless(double a, double b)
Definition: FPUtil.h:28
openSize
const int openSize
Definition: EnvUtil.h:19
gLSSLRTAStar::SetAvoidLearning
void SetAvoidLearning(bool val)
Definition: gLSSLRTAStar.h:68
gLSSLRTAStar::nodesExpanded
uint64_t nodesExpanded
Definition: gLSSLRTAStar.h:170
borderData
Definition: LSSLRTAStar.h:26
gLSSLRTAStar::GCost
double GCost(const state &s)
Definition: gLSSLRTAStar.h:108
compareBorderData
Definition: LSSLRTAStar.h:34
TemplateAStar< state, action, environment >
TemplateAStar.h
gLSSLRTAStar::SetHCost
void SetHCost(environment *env, const state &where, const state &to, double val)
Definition: gLSSLRTAStar.h:74
max
#define max(a, b)
Definition: MinimalSectorAbstraction.cpp:40
gLSSLRTAStar::GetNodesTouched
virtual uint64_t GetNodesTouched() const
Definition: gLSSLRTAStar.h:150
glssLearnedData::dead
bool dead
Definition: gLSSLRTAStar.h:47
StatCollection
The StatCollection class is for collecting stats across different parts of the simulation.
Definition: StatCollection.h:34
gLSSLRTAStar::~gLSSLRTAStar
virtual ~gLSSLRTAStar(void)
Definition: gLSSLRTAStar.h:66
glssLearnedData
Definition: gLSSLRTAStar.h:44
gLSSLRTAStar::LearnedHeuristic
std::unordered_map< uint64_t, glssLearnedData< state >, Hash64 > LearnedHeuristic
Definition: gLSSLRTAStar.h:163
gLSSLRTAStar::initialHeuristicWeight
double initialHeuristicWeight
Definition: gLSSLRTAStar.h:169
LearningAlgorithm.h
gLSSLRTAStar::heur
LearnedHeuristic heur
Definition: gLSSLRTAStar.h:167
StatCollection::AddStat
void AddStat(const char *category, const char *owner, double value)
Add a new stat entry for the given category, owner and value.
Definition: StatCollection.cpp:67
gLSSLRTAStar::HCost
double HCost(const state &from, const state &to) const
Definition: gLSSLRTAStar.h:132
gLSSLRTAStar::ClosedList
std::unordered_map< uint64_t, bool, Hash64 > ClosedList
Definition: gLSSLRTAStar.h:164
Timer
Definition: Timer.h:19
gLSSLRTAStar::BaseHCost
double BaseHCost(environment *env, const state &from, const state &to) const
Definition: gLSSLRTAStar.h:129
gLSSLRTAStar::nodesTouched
uint64_t nodesTouched
Definition: gLSSLRTAStar.h:170
fequal
bool fequal(double a, double b, double tolerance=TOLERANCE)
Definition: FPUtil.h:32
Timer::EndTimer
double EndTimer()
Definition: Timer.cpp:15
gLSSLRTAStar::GetPath
void GetPath(environment *, const state &, const state &, std::vector< action > &)
Definition: gLSSLRTAStar.h:70
gLSSLRTAStar::GetAmountLearned
double GetAmountLearned()
Definition: gLSSLRTAStar.h:159