HOG2
daLRTAStar.h
Go to the documentation of this file.
1 //
2 // daLRTAStar.h
3 // hog2 glut
4 //
5 // Created by Nathan Sturtevant on 12/19/11.
6 // Copyright (c) 2011 University of Denver. All rights reserved.
7 //
8 
9 #ifndef hog2_glut_daLRTAStar_h
10 #define hog2_glut_daLRTAStar_h
11 
12 #include "LearningAlgorithm.h"
13 #include "FPUtil.h"
14 #include <deque>
15 #include <vector>
16 #include <unordered_map>
17 #include "TemplateAStar.h"
18 #include "Timer.h"
19 #include <queue>
20 #include <iostream>
21 
22 //static bool verbose = false;
23 namespace DALRTA {
24 
25  static bool verbose = false;
26 
27  template <class state>
28  class borderData {
29  public:
30  borderData(const state &s, const double h) :theState(s), heuristic(h) {}
31  state theState;
32  double heuristic;
33  };
34 
35  template <class state>
37  {
38  public:
39  bool operator() (const borderData<state> &lhs, const borderData<state> &rhs) const
40  {
41  return (lhs.heuristic > rhs.heuristic);
42  }
43  };
44 
45  template <class state>
46  struct lssLearnedData {
48 #ifndef NO_OPENGL
49  state theState;
50 #endif
51  double theHeuristic;
52  };
53 
54  template <class state, class action, class environment>
55  class daLRTAStar : public LearningAlgorithm<state,action,environment>, public Heuristic<state> {
56  public:
57  daLRTAStar(int nodeLimit = 8)
58  { fAmountLearned = 0.0f; nodeExpansionLimit = nodeLimit; m_pEnv = 0; }
59  virtual ~daLRTAStar(void) { }
60 
61  void GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath);
62 
63  void GetPath(environment *, const state& , const state& , std::vector<action> & ) { assert(false); };
64  virtual const char *GetName()
65  {
66  static char name[255];
67  sprintf(name, "daLRTAStar(%d)", nodeExpansionLimit); return name;
68  }
69  void SetHCost(environment *env, const state &where, const state &to, double val)
70  {
71  heur[env->GetStateHash(where)].theHeuristic = val-env->HCost(where, to);
72 #ifndef NO_OPENGL
73  heur[env->GetStateHash(where)].theState = where;
74 #endif
75  }
76  double HCost(environment *env, const state &from, const state &to) const
77  {
78  auto val = heur.find(env->GetStateHash(from));
79  if (val != heur.end())
80  {
81  return val->second.theHeuristic+env->HCost(from, to);
82  }
83  return env->HCost(from, to);
84 //
85 // if (heur.find(env->GetStateHash(from)) != heur.end())
86 // return heur[env->GetStateHash(from)].theHeuristic+
87 // env->HCost(from, to);
88 // return env->HCost(from, to);
89  }
90  double HCostLearned(environment *env, const state &from)
91  {
92  if (heur.find(env->GetStateHash(from)) != heur.end())
93  return heur[env->GetStateHash(from)].theHeuristic;
94  return 0;
95  }
96  double HCost(const state &from, const state &to) const
97  {
98  assert(m_pEnv != 0);
99  return HCost(m_pEnv, from, to);
100  }
101 
102  virtual uint64_t GetNodesExpanded() const { return nodesExpanded; }
103  virtual uint64_t GetNodesTouched() const { return nodesTouched; }
104  virtual void LogFinalStats(StatCollection *s)
105  { s->AddStat("TotalLearning", GetName(),fAmountLearned); }
106 
107  double GetAmountLearned() { return fAmountLearned; }
108  void OpenGLDraw() const {}
109  void OpenGLDraw(const environment *env) const;
110  private:
111  typedef std::priority_queue<borderData<state>,std::vector<borderData<state> >,compareBorderData<state> > pQueue;
112  typedef std::unordered_map<uint64_t, lssLearnedData<state>, Hash64 > LearnedHeuristic;
113  typedef std::unordered_map<uint64_t, bool, Hash64 > ClosedList;
114 
115 
116  bool ExpandLSS(environment *env, const state &from, const state &to, std::vector<state> &thePath);
117  void BuildLSSQ(environment *env, pQueue &q, state &best, const state &target);
118  void LearnHeuristic(environment *env, pQueue &q, const state &to);
119 
120  environment *m_pEnv;
126  state first;
127  std::vector<state> goals;
128  };
129 
130  const int kFrom = 1;
131  const int kTo = 0;
132 
134  template <class state, class action, class environment>
135  void daLRTAStar<state, action, environment>::GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath)
136  {
137  Timer t;
138  t.StartTimer();
139  m_pEnv = env;
140  thePath.resize(0);
141  if (from==to)
142  return;
143 
144  if (thePath.size() != 0)
145  return;
146 
147  pQueue q1, q2;
148 
149  bool gotoGoal;
150  gotoGoal = ExpandLSS(env, from, to, thePath);
151 
152  BuildLSSQ(env, q1, first, to);
153  LearnHeuristic(env, q1, to);
154 
155  if (gotoGoal)
156  first = to;
157  // 3. construct best path
158  astar.ExtractPathToStart(first, thePath);
159  t.EndTimer();
160  //std::cout << GetName() << "\t" << nodesExpanded << "\t" << t.GetElapsedTime() << "\t" << nodesExpanded/t.GetElapsedTime() << std::endl;
161  }
162 
163  template <class state, class action, class environment>
164  bool daLRTAStar<state, action, environment>::ExpandLSS(environment *env, const state &from, const state &to, std::vector<state> &thePath)
165  {
166  bool expandedGoal = false;
167  astar.InitializeSearch(env, from, to, thePath);
168  astar.SetHeuristic(this);
169  astar.SetUseBPMX(1);
170  for (int x = 0; x < nodeExpansionLimit; x++)
171  {
172  if (astar.CheckNextNode() == to) // never expand goal
173  {
174  expandedGoal = true;
175  break;
176  }
177  astar.DoSingleSearchStep(thePath);
178  }
179  nodesExpanded = astar.GetNodesExpanded();
180  nodesTouched = astar.GetNodesTouched();
181  return expandedGoal;
182  }
183 
184  template <class state, class action, class environment>
185  void daLRTAStar<state, action, environment>::BuildLSSQ(environment *env, pQueue &q, state &first, const state &target)
186  {
187  // 1. put all open nodes in pqueue
188  double bestF = -1;
189 
190  unsigned int openSize = astar.GetNumOpenItems();
191  for (unsigned int x = 0; x < openSize; x++)
192  {
193  const auto data = astar.GetOpenItem(x);
194  if ((bestF == -1) || (fless(HCostLearned(env,data.data)*10000+(data.g+data.h), bestF)))
195  {
196  bestF = HCostLearned(env,data.data)*10000+(data.g+data.h);
197  first = data.data;
198  }
199  double h = HCost(env, data.data, target);
200  q.push(borderData<state>(data.data, h));
201  if (verbose) std::cout << "Preparing border state: " << data.data << " h: " << h << std::endl;
202  }
203  }
204 
205  template <class state, class action, class environment>
206  void daLRTAStar<state, action, environment>::LearnHeuristic(environment *env, pQueue &q, const state &to)
207  {
208  std::vector<state> succ;
209  ClosedList c;
210  while (q.size() > 0)
211  {
212  nodesExpanded++;
213  nodesTouched++;
214  state s = q.top().theState;
215  if (verbose) std::cout << "Starting with " << s << " h: " << q.top().heuristic << "/" << HCost(env, s, to) << std::endl;
216  q.pop();
217  // std::cout << s << " " << learnData[env->GetStateHash(s)].learnedHeuristic << std::endl;
218  env->GetSuccessors(s, succ);
219  double hCost = HCost(env, s, to);
220  for (unsigned int x = 0; x < succ.size(); x++)
221  {
222  nodesTouched++;
223  double succHCost;
224  if (!astar.GetClosedListGCost(succ[x], succHCost))
225  {
226  if (verbose) std::cout << succ[x] << " not in closed\n";
227  continue;
228  }
229 
230  double edgeCost = env->GCost(s, succ[x]);
231  if (verbose) std::cout << s << " to " << succ[x] << " " << edgeCost << " ";
232  succHCost = HCost(env, succ[x], to);
233  if (c[env->GetStateHash(succ[x])]) // in closed list, but seen before, update if smaller
234  {
235  if (verbose) std::cout << succ[x] << " updated before ";
236  if (fless(hCost + edgeCost, succHCost))
237  {
238  if (verbose) std::cout << "lowering cost to " << hCost + edgeCost << " from " << succHCost << std::endl;
239  fAmountLearned = fAmountLearned - (succHCost - (hCost+edgeCost));
240  if (verbose) std::cout << " learning now " << fAmountLearned;
241  SetHCost(env, succ[x], to, hCost + edgeCost);
242  q.push(borderData<state>(succ[x], hCost + edgeCost));
243  }
244  if (verbose) std::cout << std::endl;
245  }
246  else { // not expanded before and in closed list, always update
247  // 2. expand successors, checking if they are in closed list and adding to queue
248  // if (fless(succHCost, hCost - edgeCost))
249  if (verbose) std::cout << succ[x] << " NOT updated before ";
250  //if (fgreater(hCost + edgeCost, succHCost))
251  {
252  if (verbose) std::cout << "setting cost to " << hCost + edgeCost << " over " << succHCost;
253  fAmountLearned += (edgeCost + hCost) - succHCost;
254  if (verbose) std::cout << " learning now " << fAmountLearned;
255  SetHCost(env, succ[x], to, hCost + edgeCost);
256  q.push(borderData<state>(succ[x], hCost + edgeCost));
257  c[env->GetStateHash(succ[x])] = true;
258  }
259  if (verbose) std::cout << std::endl;
260  }
261  }
262  }
263  }
264 
265 
266  template <class state, class action, class environment>
267  void daLRTAStar<state, action, environment>::OpenGLDraw(const environment *e) const
268  {
269  //astar.OpenGLDraw();
270 
271  unsigned int openSize = astar.GetNumOpenItems();
272  for (unsigned int x = 0; x < openSize; x++)
273  {
274  const auto data = astar.GetOpenItem(x);
275  if (data.data == first)
276  e->SetColor(0, 0.5, 0.5);
277  else
278  e->SetColor(0, 1, 0);
279  e->OpenGLDraw(data.data);
280  }
281 
282  double learned = 1;
283  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
284  {
285  double thisState = (*it).second.theHeuristic;
286  if (learned < thisState)
287  learned = thisState;
288 
289  }
290  char str[255];
291  //printf("Max learning: %f/%f\n", learned[0], learned[1]);
292  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
293  {
294  double r = (*it).second.theHeuristic;
295  if (r > 0)
296  {
297  sprintf(str, "%3.2f", r);
298  e->SetColor(1,1,1);
299 // e->GLLabelState((*it).second.theState, str);
300  e->SetColor(0.5+0.5*r/learned, 0, 0.0, 0.5+0.5*r/learned);
301 #ifndef NO_OPENGL
302  e->OpenGLDraw((*it).second.theState);
303 #endif
304  }
305  }
306  }
307 
308 }
309 
310 #endif
DALRTA::daLRTAStar::first
state first
Definition: daLRTAStar.h:126
DALRTA::daLRTAStar::LogFinalStats
virtual void LogFinalStats(StatCollection *s)
Definition: daLRTAStar.h:104
DALRTA::daLRTAStar::nodeExpansionLimit
int nodeExpansionLimit
Definition: daLRTAStar.h:124
DALRTA::daLRTAStar::HCostLearned
double HCostLearned(environment *env, const state &from)
Definition: daLRTAStar.h:90
DALRTA::daLRTAStar::HCost
double HCost(environment *env, const state &from, const state &to) const
Definition: daLRTAStar.h:76
DALRTA::compareBorderData
Definition: daLRTAStar.h:36
DALRTA::lssLearnedData::theHeuristic
double theHeuristic
Definition: daLRTAStar.h:51
Heuristic
Definition: Heuristic.h:30
DALRTA::daLRTAStar::fAmountLearned
double fAmountLearned
Definition: daLRTAStar.h:122
Timer::StartTimer
void StartTimer()
Definition: Timer.cpp:9
LearningAlgorithm
Definition: LearningAlgorithm.h:15
DALRTA::daLRTAStar::astar
TemplateAStar< state, action, environment > astar
Definition: daLRTAStar.h:125
DALRTA::daLRTAStar::LearnHeuristic
void LearnHeuristic(environment *env, pQueue &q, const state &to)
Definition: daLRTAStar.h:206
FPUtil.h
DALRTA::daLRTAStar::heur
LearnedHeuristic heur
Definition: daLRTAStar.h:121
DALRTA::daLRTAStar::m_pEnv
environment * m_pEnv
Definition: daLRTAStar.h:120
DALRTA::daLRTAStar::goals
std::vector< state > goals
Definition: daLRTAStar.h:127
DALRTA::borderData::theState
state theState
Definition: daLRTAStar.h:31
DALRTA::borderData::heuristic
double heuristic
Definition: daLRTAStar.h:32
DALRTA::verbose
static bool verbose
Definition: daLRTAStar.h:25
DALRTA::daLRTAStar::GetNodesTouched
virtual uint64_t GetNodesTouched() const
Definition: daLRTAStar.h:103
Timer.h
DALRTA::borderData::borderData
borderData(const state &s, const double h)
Definition: daLRTAStar.h:30
DALRTA::daLRTAStar::BuildLSSQ
void BuildLSSQ(environment *env, pQueue &q, state &best, const state &target)
Definition: daLRTAStar.h:185
DALRTA::daLRTAStar::~daLRTAStar
virtual ~daLRTAStar(void)
Definition: daLRTAStar.h:59
DALRTA::daLRTAStar::ExpandLSS
bool ExpandLSS(environment *env, const state &from, const state &to, std::vector< state > &thePath)
Definition: daLRTAStar.h:164
DALRTA::kTo
const int kTo
Definition: daLRTAStar.h:131
DALRTA::daLRTAStar::GetName
virtual const char * GetName()
Definition: daLRTAStar.h:64
Hash64
Definition: SearchEnvironment.h:23
DALRTA::daLRTAStar::GetPath
void GetPath(environment *, const state &, const state &, std::vector< action > &)
Definition: daLRTAStar.h:63
fless
bool fless(double a, double b)
Definition: FPUtil.h:28
openSize
const int openSize
Definition: EnvUtil.h:19
TemplateAStar< state, action, environment >
DALRTA::daLRTAStar::OpenGLDraw
void OpenGLDraw() const
Definition: daLRTAStar.h:108
DALRTA::lssLearnedData::lssLearnedData
lssLearnedData()
Definition: daLRTAStar.h:47
DALRTA::daLRTAStar::GetPath
void GetPath(environment *env, const state &from, const state &to, std::vector< state > &thePath)
The core routine of FLRTAStar2 – computes at most one-move path.
Definition: daLRTAStar.h:135
DALRTA
Definition: daLRTAStar.h:23
TemplateAStar.h
DALRTA::daLRTAStar::ClosedList
std::unordered_map< uint64_t, bool, Hash64 > ClosedList
Definition: daLRTAStar.h:113
DALRTA::kFrom
const int kFrom
Definition: daLRTAStar.h:130
DALRTA::daLRTAStar::nodesTouched
uint64_t nodesTouched
Definition: daLRTAStar.h:123
DALRTA::daLRTAStar::daLRTAStar
daLRTAStar(int nodeLimit=8)
Definition: daLRTAStar.h:57
DALRTA::daLRTAStar::GetNodesExpanded
virtual uint64_t GetNodesExpanded() const
Definition: daLRTAStar.h:102
DALRTA::daLRTAStar::SetHCost
void SetHCost(environment *env, const state &where, const state &to, double val)
Definition: daLRTAStar.h:69
DALRTA::daLRTAStar::nodesExpanded
uint64_t nodesExpanded
Definition: daLRTAStar.h:123
StatCollection
The StatCollection class is for collecting stats across different parts of the simulation.
Definition: StatCollection.h:34
DALRTA::daLRTAStar::HCost
double HCost(const state &from, const state &to) const
Definition: daLRTAStar.h:96
DALRTA::lssLearnedData::theState
state theState
Definition: daLRTAStar.h:49
DALRTA::daLRTAStar::GetAmountLearned
double GetAmountLearned()
Definition: daLRTAStar.h:107
LearningAlgorithm.h
DALRTA::daLRTAStar
Definition: daLRTAStar.h:55
DALRTA::borderData
Definition: daLRTAStar.h:28
StatCollection::AddStat
void AddStat(const char *category, const char *owner, double value)
Add a new stat entry for the given category, owner and value.
Definition: StatCollection.cpp:67
Timer
Definition: Timer.h:19
DALRTA::daLRTAStar::LearnedHeuristic
std::unordered_map< uint64_t, lssLearnedData< state >, Hash64 > LearnedHeuristic
Definition: daLRTAStar.h:112
DALRTA::lssLearnedData
Definition: daLRTAStar.h:46
Timer::EndTimer
double EndTimer()
Definition: Timer.cpp:15
DALRTA::daLRTAStar::pQueue
std::priority_queue< borderData< state >, std::vector< borderData< state > >, compareBorderData< state > > pQueue
Definition: daLRTAStar.h:111
DALRTA::compareBorderData::operator()
bool operator()(const borderData< state > &lhs, const borderData< state > &rhs) const
Definition: daLRTAStar.h:39