HOG2
LSSLRTAStar.h
Go to the documentation of this file.
1 /*
2  * LSSLRTAStar.h
3  * hog2
4  *
5  * Created by Nathan Sturtevant on 10/6/09.
6  * Copyright 2009 NS Software. All rights reserved.
7  *
8  */
9 
10 #ifndef LSSLRTASTAR_H
11 #define LSSLRTASTAR_H
12 
13 #include "LearningAlgorithm.h"
14 #include "FPUtil.h"
15 #include <deque>
16 #include <vector>
17 #include <unordered_map>
18 #include "TemplateAStar.h"
19 #include "Timer.h"
20 #include <queue>
21 #include <iostream>
22 
23 static bool verbose = false;
24 
25 template <class state>
26 class borderData {
27 public:
28  borderData(const state &s, const double h) :theState(s), heuristic(h) {}
29  state theState;
30  double heuristic;
31 };
32 
33 template <class state>
35 {
36 public:
37  bool operator() (const borderData<state> &lhs, const borderData<state> &rhs) const
38  {
39  return (lhs.heuristic > rhs.heuristic);
40  }
41 };
42 
43 template <class state>
45  lssLearnedData() { dead = false; }
46  state theState;
47  bool dead;
48  double theHeuristic;
49 };
50 
51 template <class state, class action, class environment>
52 class LSSLRTAStar : public LearningAlgorithm<state,action,environment>, public Heuristic<state> {
53 public:
54  LSSLRTAStar(int nodeLimit = 8)
55  {
56  fAmountLearned = 0.0f;
57  nodeExpansionLimit = nodeLimit;
58  avoidLearning = false;
59  minInitialHeuristic = DBL_MAX;
61  initialHeuristic = true;
62  randomizeMoves = true;
64  }
65  virtual ~LSSLRTAStar(void) { }
66 
67  void SetAvoidLearning(bool val) { avoidLearning = val; }
68  void GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath);
69  void GetPath(environment *, const state& , const state& , std::vector<action> & ) { assert(false); };
70  virtual const char *GetName()
71  { static char name[255];
72  if (avoidLearning) sprintf(name, "aLSSLRTAStar(%d)", nodeExpansionLimit);
73  else sprintf(name, "LSSLRTAStar(%d)", nodeExpansionLimit); return name; }
74  void SetHCost(environment *env, const state &where, const state &to, double val)
75  {
76  heur[env->GetStateHash(where)].theHeuristic = val-BaseHCost(env, where, to);
77  heur[env->GetStateHash(where)].theState = where;
78  }
79  double HCostLearned(const state &from)
80  {
81  if (heur.find(m_pEnv->GetStateHash(from)) != heur.end())
82  return heur[m_pEnv->GetStateHash(from)].theHeuristic;
83  return 0;
84  }
85  double HCost(environment *env, const state &from, const state &to) const
86  {
87  auto val = heur.find(env->GetStateHash(from));
88  if (val != heur.end())
89  {
90  return val->second.theHeuristic+env->HCost(from, to);
91  }
92  return BaseHCost(env, from, to);//env->HCost(from, to);
93 // if (heur.find(env->GetStateHash(from)) != heur.end())
94 // return heur[env->GetStateHash(from)].theHeuristic+BaseHCost(env, from, to);
95 // return BaseHCost(env, from, to);
96  }
97  double BaseHCost(environment *env, const state &from, const state &to) const
98  { return initialHeuristicWeight*env->HCost(from, to);
99  }
100  double HCost(const state &from, const state &to) const
101  { return HCost(m_pEnv, from, to); }
102 
104  {
105  double learned = 0;
106  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
107  {
108  double thisState = (*it).second.theHeuristic;
109  if (learned < thisState)
110  learned = thisState;
111  }
112  return learned;
113  }
115  { initialHeuristicWeight = val; }
116 
117  virtual uint64_t GetNodesExpanded() const { return nodesExpanded; }
118  virtual uint64_t GetNodesTouched() const { return nodesTouched; }
119  virtual void LogFinalStats(StatCollection *s)
120  {
121  s->AddStat("TotalLearning", GetName(),fAmountLearned);
122  s->AddStat("MinInitial", GetName(),minInitialHeuristic);
123  s->AddStat("MaxLater", GetName(),maxLaterHeuristic);
124  s->AddStat("MaxStateLearning", GetName(), GetMaxStateLearning());
125  }
126 
127  double GetAmountLearned() { return fAmountLearned; }
128  void OpenGLDraw() const {}
129  void OpenGLDraw(const environment *env) const;
130 private:
131  typedef std::unordered_map<uint64_t, lssLearnedData<state>, Hash64 > LearnedHeuristic;
132  typedef std::unordered_map<uint64_t, bool, Hash64 > ClosedList;
133 
134  environment *m_pEnv;
142 
147 };
148 
150 template <class state, class action, class environment>
151 void LSSLRTAStar<state, action, environment>::GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath)
152 {
153  // This code measures the size of the first heuristic minima that the agent passes over (not well)
154  if (initialHeuristic)
155  {
156  double tmp;
157  if ((tmp = BaseHCost(env, from, to)) > minInitialHeuristic)
158  {
159  initialHeuristic = false;
160  maxLaterHeuristic = tmp;
161  }
162  else {
163  minInitialHeuristic = tmp;
164  }
165  }
166  else {
167  maxLaterHeuristic = std::max(BaseHCost(env, from, to), maxLaterHeuristic);
168  }
169  // Done measurement
170 
171  Timer t;
172  t.StartTimer();
173  m_pEnv = env;
174  thePath.resize(0);
175  if (from==to)
176  return;
177 
178  astar.SetHeuristic(this);
179  astar.InitializeSearch(env, from, to, thePath);
180  astar.SetUseBPMX(1);
181  for (int x = 0; x < nodeExpansionLimit; x++)
182  {
183  if (astar.CheckNextNode() == to)
184  break;
185  astar.DoSingleSearchStep(thePath);
186 // if (astar.DoSingleSearchStep(thePath))
187 // {
188 // // return reversed path
189 // std::reverse(thePath.begin(), thePath.end());
190 // break;
191 // }
192  }
193  nodesExpanded = astar.GetNodesExpanded();
194  nodesTouched = astar.GetNodesTouched();
195 // std::cout << GetName() << " " << nodesExpanded << " expanded by A* with expansion limit " << nodeExpansionLimit << std::endl;
196 
197  if (thePath.size() != 0)
198  return;
199 
200  // 1. put all open nodes in pqueue
201  typedef std::priority_queue<borderData<state>,std::vector<borderData<state> >,compareBorderData<state> > pQueue;
202  pQueue q;
203 
204  double bestF = -1;
205  double bestLearning = -1;
206  state first;
207 
208  unsigned int openSize = astar.GetNumOpenItems();
209  int randCount = 1;
210  for (unsigned int x = 0; x < openSize; x++)
211  {
212  const auto data = astar.GetOpenItem(x);
213  double currLearning = HCostLearned(data.data);
214  if (avoidLearning)
215  {
216  if ((bestF == -1) ||
217  (currLearning == 0 && bestLearning != 0) ||
218  (currLearning == 0 && bestLearning == 0 && (fless(data.g+data.h, bestF))) ||
219  (currLearning != 0 && bestLearning != 0 && (fless(data.g+data.h, bestF))))
220  {
221  bestLearning = currLearning;
222  bestF = data.g+data.h;
223  first = data.data;
224  }
225  }
226  else {
227  if ((bestF == -1) || (fless(data.g+data.h, bestF)))
228  {
229  bestF = data.g+data.h;
230  first = data.data;
231  randCount = 1;
232  }
233  if (randomizeMoves && fequal(data.g+data.h, bestF))
234  {
235  randCount++;
236  if (0 == random()%randCount)
237  {
238  first = data.data;
239  }
240  }
241  }
242  q.push(borderData<state>(data.data, data.h));
243  if (verbose) std::cout << "Preparing border state: " << data.data << " h: " << data.h << std::endl;
244  }
245 
246  std::vector<state> succ;
247  ClosedList c;
248  while (q.size() > 0)
249  {
250  nodesExpanded++;
251  nodesTouched++;
252  state s = q.top().theState;
253  if (verbose) std::cout << "Starting with " << s << " h: " << q.top().heuristic << "/" << HCost(s, to) << std::endl;
254  q.pop();
255  // std::cout << s << " " << learnData[env->GetStateHash(s)].learnedHeuristic << std::endl;
256  env->GetSuccessors(s, succ);
257  double hCost = HCost(s, to);
258  for (unsigned int x = 0; x < succ.size(); x++)
259  {
260  nodesTouched++;
261  double succHCost;
262  if (!astar.GetClosedListGCost(succ[x], succHCost))
263  {
264  if (verbose) std::cout << succ[x] << " not in closed\n";
265  continue;
266  }
267  double edgeCost = env->GCost(s, succ[x]);
268  if (verbose) std::cout << s << " to " << succ[x] << " " << edgeCost << " ";
269  succHCost = HCost(env, succ[x], to);
270  if (c[env->GetStateHash(succ[x])]) // in closed list, but seen before, update if smaller
271  {
272  if (verbose) std::cout << succ[x] << " updated before ";
273  if (fless(hCost + edgeCost, succHCost))
274  {
275  if (verbose) std::cout << "lowering cost to " << hCost + edgeCost << " from " << succHCost << std::endl;
276  fAmountLearned = fAmountLearned - (succHCost - (hCost+edgeCost));
277  if (verbose) std::cout << " learning now " << fAmountLearned;
278  SetHCost(env, succ[x], to, hCost + edgeCost);
279  q.push(borderData<state>(succ[x], hCost + edgeCost));
280  }
281  if (verbose) std::cout << std::endl;
282  }
283  else { // not expanded before and in closed list, always update
284  // 2. expand successors, checking if they are in closed list and adding to queue
285 // if (fless(succHCost, hCost - edgeCost))
286  if (verbose) std::cout << succ[x] << " NOT updated before ";
287  //if (fgreater(hCost + edgeCost, succHCost))
288  {
289  if (verbose) std::cout << "setting cost to " << hCost + edgeCost << " over " << succHCost;
290  fAmountLearned += (edgeCost + hCost) - succHCost;
291  if (verbose) std::cout << " learning now " << fAmountLearned;
292  SetHCost(env, succ[x], to, hCost + edgeCost);
293  q.push(borderData<state>(succ[x], hCost + edgeCost));
294  c[env->GetStateHash(succ[x])] = true;
295  }
296  if (verbose) std::cout << std::endl;
297  }
298  }
299  }
300  //std::cout << GetName() << " " << nodesExpanded-nodeExpansionLimit << " expanded during learning" << std::endl;
301 
302 // if (thePath.size() != 0)
303 // return;
304 
305  // 3. construct best path
306  astar.ExtractPathToStart(first, thePath);
307  if (verbose) std::cout << "LSS-LRTA* heading towards " << thePath[0] << "with h-value " << HCost(env, thePath[0], to) << std::endl;
308  t.EndTimer();
309  //std::cout << GetName() << "\t" << nodesExpanded << "\t" << t.GetElapsedTime() << "\t" << nodesExpanded/t.GetElapsedTime() << std::endl;
310 }
311 
312 template <class state, class action, class environment>
314 {
315  astar.OpenGLDraw();
316 
317  double learned = 0;
318  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
319  {
320  double thisState = (*it).second.theHeuristic;
321  if (learned < thisState)
322  learned = thisState;
323  }
324  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
325  {
326  double r = (*it).second.theHeuristic;
327  if (r > 0)
328  {
329  e->SetColor(0.5+0.5*r/learned, 0, 0, 0.1+0.8*r/learned);
330  e->OpenGLDraw((*it).second.theState);
331  }
332  }
333 }
334 
335 #endif
LSSLRTAStar::SetAvoidLearning
void SetAvoidLearning(bool val)
Definition: LSSLRTAStar.h:67
LSSLRTAStar::nodesTouched
uint64_t nodesTouched
Definition: LSSLRTAStar.h:138
LSSLRTAStar::LSSLRTAStar
LSSLRTAStar(int nodeLimit=8)
Definition: LSSLRTAStar.h:54
lssLearnedData::theState
state theState
Definition: LSSLRTAStar.h:46
Heuristic
Definition: Heuristic.h:30
LSSLRTAStar::OpenGLDraw
void OpenGLDraw() const
Definition: LSSLRTAStar.h:128
LSSLRTAStar::nodeExpansionLimit
int nodeExpansionLimit
Definition: LSSLRTAStar.h:139
LSSLRTAStar::GetNodesTouched
virtual uint64_t GetNodesTouched() const
Definition: LSSLRTAStar.h:118
LSSLRTAStar::SetInititialHeuristicWeight
void SetInititialHeuristicWeight(double val)
Definition: LSSLRTAStar.h:114
Timer::StartTimer
void StartTimer()
Definition: Timer.cpp:9
LSSLRTAStar::minInitialHeuristic
double minInitialHeuristic
Definition: LSSLRTAStar.h:145
LearningAlgorithm
Definition: LearningAlgorithm.h:15
borderData::borderData
borderData(const state &s, const double h)
Definition: LSSLRTAStar.h:28
LSSLRTAStar::GetAmountLearned
double GetAmountLearned()
Definition: LSSLRTAStar.h:127
FPUtil.h
LSSLRTAStar::m_pEnv
environment * m_pEnv
Definition: LSSLRTAStar.h:134
LSSLRTAStar::maxLaterHeuristic
double maxLaterHeuristic
Definition: LSSLRTAStar.h:146
LSSLRTAStar::SetHCost
void SetHCost(environment *env, const state &where, const state &to, double val)
Definition: LSSLRTAStar.h:74
LSSLRTAStar::avoidLearning
bool avoidLearning
Definition: LSSLRTAStar.h:140
LSSLRTAStar::GetMaxStateLearning
double GetMaxStateLearning()
Definition: LSSLRTAStar.h:103
Timer.h
LSSLRTAStar::GetNodesExpanded
virtual uint64_t GetNodesExpanded() const
Definition: LSSLRTAStar.h:117
LSSLRTAStar::nodesExpanded
uint64_t nodesExpanded
Definition: LSSLRTAStar.h:138
Hash64
Definition: SearchEnvironment.h:23
LSSLRTAStar
Definition: LSSLRTAStar.h:52
fless
bool fless(double a, double b)
Definition: FPUtil.h:28
openSize
const int openSize
Definition: EnvUtil.h:19
borderData
Definition: LSSLRTAStar.h:26
compareBorderData
Definition: LSSLRTAStar.h:34
TemplateAStar< state, action, environment >
LSSLRTAStar::initialHeuristic
bool initialHeuristic
Definition: LSSLRTAStar.h:144
LSSLRTAStar::HCost
double HCost(environment *env, const state &from, const state &to) const
Definition: LSSLRTAStar.h:85
TemplateAStar.h
LSSLRTAStar::GetName
virtual const char * GetName()
Definition: LSSLRTAStar.h:70
lssLearnedData::theHeuristic
double theHeuristic
Definition: LSSLRTAStar.h:48
LSSLRTAStar::GetPath
void GetPath(environment *env, const state &from, const state &to, std::vector< state > &thePath)
The core routine of LSSLRTAStar – computes at most one-move path.
Definition: LSSLRTAStar.h:151
max
#define max(a, b)
Definition: MinimalSectorAbstraction.cpp:40
LSSLRTAStar::~LSSLRTAStar
virtual ~LSSLRTAStar(void)
Definition: LSSLRTAStar.h:65
LSSLRTAStar::randomizeMoves
bool randomizeMoves
Definition: LSSLRTAStar.h:143
borderData::theState
state theState
Definition: LSSLRTAStar.h:29
StatCollection
The StatCollection class is for collecting stats across different parts of the simulation.
Definition: StatCollection.h:34
LSSLRTAStar::initialHeuristicWeight
double initialHeuristicWeight
Definition: LSSLRTAStar.h:137
LearningAlgorithm.h
lssLearnedData::lssLearnedData
lssLearnedData()
Definition: LSSLRTAStar.h:45
LSSLRTAStar::LearnedHeuristic
std::unordered_map< uint64_t, lssLearnedData< state >, Hash64 > LearnedHeuristic
Definition: LSSLRTAStar.h:131
LSSLRTAStar::HCostLearned
double HCostLearned(const state &from)
Definition: LSSLRTAStar.h:79
StatCollection::AddStat
void AddStat(const char *category, const char *owner, double value)
Add a new stat entry for the given category, owner and value.
Definition: StatCollection.cpp:67
LSSLRTAStar::ClosedList
std::unordered_map< uint64_t, bool, Hash64 > ClosedList
Definition: LSSLRTAStar.h:132
Timer
Definition: Timer.h:19
LSSLRTAStar::GetPath
void GetPath(environment *, const state &, const state &, std::vector< action > &)
Definition: LSSLRTAStar.h:69
LSSLRTAStar::HCost
double HCost(const state &from, const state &to) const
Definition: LSSLRTAStar.h:100
lssLearnedData::dead
bool dead
Definition: LSSLRTAStar.h:47
fequal
bool fequal(double a, double b, double tolerance=TOLERANCE)
Definition: FPUtil.h:32
borderData::heuristic
double heuristic
Definition: LSSLRTAStar.h:30
lssLearnedData
Definition: LSSLRTAStar.h:44
LSSLRTAStar::BaseHCost
double BaseHCost(environment *env, const state &from, const state &to) const
Definition: LSSLRTAStar.h:97
Timer::EndTimer
double EndTimer()
Definition: Timer.cpp:15
LSSLRTAStar::heur
LearnedHeuristic heur
Definition: LSSLRTAStar.h:135
LSSLRTAStar::LogFinalStats
virtual void LogFinalStats(StatCollection *s)
Definition: LSSLRTAStar.h:119
compareBorderData::operator()
bool operator()(const borderData< state > &lhs, const borderData< state > &rhs) const
Definition: LSSLRTAStar.h:37
LSSLRTAStar::fAmountLearned
double fAmountLearned
Definition: LSSLRTAStar.h:136
LSSLRTAStar::astar
TemplateAStar< state, action, environment > astar
Definition: LSSLRTAStar.h:141
verbose
static bool verbose
Definition: LSSLRTAStar.h:23