HOG2
LRTAStar.h
Go to the documentation of this file.
1 /*
2  * $Id: LRTAStar.h
3  * hog2
4  *
5  * Created by Nathan Sturtevant on 10/04/05.
6  * Modified by Nathan Sturtevant on 02/29/20.
7  *
8  * This file is part of HOG2. See https://github.com/nathansttt/hog2 for licensing information.
9  *
10  */
11 
12 #ifndef LRTASTAR_H
13 #define LRTASTAR_H
14 
15 #include "LearningAlgorithm.h"
16 #include "FPUtil.h"
17 #include <deque>
18 #include <vector>
19 #include <unordered_map>
20 
21 template <class state>
22 struct learnedData {
23  state theState;
24  double theHeuristic;
25 };
26 
27 // This class defines the LRTA* algorithm
28 template <class state, class action, class environment>
29 class LRTAStar : public LearningAlgorithm<state,action,environment> {
30 public:
32  { fAmountLearned = 0.0f; }
33  virtual ~LRTAStar(void) { }
34 
35  void GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath);
36  void GetPath(environment *, const state& , const state& , std::vector<action> & ) { assert(false); };
37  virtual const char *GetName() { return "LRTAStar"; }
38  void SetHCost(environment *env, const state &where, const state &to, double val)
39  {
40  heur[env->GetStateHash(where)].theHeuristic = val-env->HCost(where, to);
41  heur[env->GetStateHash(where)].theState = where;
42  }
43  double HCost(environment *env, const state &from, const state &to) const
44  {
45  auto val = heur.find(env->GetStateHash(from));
46  if (val != heur.end())
47  {
48  return val->second.theHeuristic+env->HCost(from, to);
49  }
50  return env->HCost(from, to);
51 // if (heur.find(env->GetStateHash(from)) != heur.end())
52 // return heur[env->GetStateHash(from)].theHeuristic+env->HCost(from, to);
53 // return env->HCost(from, to);
54  }
55 
56  virtual uint64_t GetNodesExpanded() const { return nodesExpanded; }
57  virtual uint64_t GetNodesTouched() const { return nodesTouched; }
58  virtual void LogFinalStats(StatCollection *s)
59  {
60  s->AddStat("TotalLearning", GetName(),fAmountLearned);
61  }
62 
63  double GetAmountLearned() { return fAmountLearned; }
64  void OpenGLDraw() const {}
65  void OpenGLDraw(const environment *env) const;
66 private:
67  typedef std::unordered_map<uint64_t, learnedData<state>, Hash64 > LearnedHeuristic;
68 
70  state goal;
73 };
74 
76 template <class state, class action, class environment>
77 void LRTAStar<state, action, environment>::GetPath(environment *env, const state& from, const state& to, std::vector<state> &thePath)
78 {
79  goal = to;
80  thePath.resize(0);
81  if (from==to)
82  return;
83 
84  // Initialization -----------------------------------------------------
85  // Use the benefit rule (i.e., never decrease h[]) ?
86  static const bool benefitRule = true;
87 
88  // Initialize the variables
89  double minF = DBL_MAX;
90  double minF_D = DBL_MAX;
91  double minG = 0;
92  int minC = -1;
93  int minD = -1; // depression avoidance
94  nodesExpanded = 0;
95  nodesTouched = 0;
96  nodesTouched++;
97 
98  // Lookahead --------------------------------------------------------------------
99  // get the current node off the open List
100  nodesExpanded++;
101 
102  // Iterate through all edges coming out of *n
103  std::vector<state> neighbors;
104  env->GetSuccessors(from, neighbors);
105  for (unsigned int x = 0; x < neighbors.size(); x++)
106  {
107  // Generate a child
108  nodesTouched++;
109 
110  // Check if the child's tile is occupied
111  if (env->GetOccupancyInfo() && !env->GetOccupancyInfo()->CanMove(from, neighbors[x]))
112  continue;
113 
114  // Compute the g values
115  double g = env->GCost(from, neighbors[x]);// e->getWeight();
116 
117  // See if the min on this level needs to be updated
118  double h = HCost(env, neighbors[x], to);
119  double f = g + h;
120 
121  if (fless(f, minF))
122  {
123  minF = f;
124  minG = g;
125  minC = (int)x;
126  }
127  // tie break the same as other algorithms
128  if (fequal(f, minF) && fgreater(g, minG))
129  {
130  minF = f;
131  minG = g;
132  minC = (int)x;
133  }
134  if (1)// da
135  {
136  if (fless((h-env->HCost(neighbors[x], to))*10
137  +g+h, minF_D))
138  {
139  minF_D = (h-env->HCost(neighbors[x], to))*10+g+h;
140  minD = (int)x;
141  }
142  //[h(s')-h0(s')]*LARGE + [cost(s,s')+h(s')]
143  }
144  }
145 
146  // If there were no legitimate children (e.g., we are blocked out)
147  // then return an empty path
148  if (minC == -1)
149  return;
150 
151  // Update h ---------------------------------------------------------------------
152 
153  // Compute the new h
154  double newH = minF;
155 
156  // Compute the amount of learning
157  double oldH = HCost(env, from, to);
158  double deltaH = newH - oldH;
159 
160  // The benefit rule disallows us to make updates or account for decreases in h
161  if (benefitRule && fless(deltaH, 0.0))
162  deltaH = 0.0;
163 
164  deltaH = fabs(deltaH); // decreasing h is also learning
165 
166  // update h[from,to]
167  if (fgreater(deltaH,0.0))
168  SetHCost(env, from, to, newH);
169 
170  // Update the amount learned on this trial
171  // We do this with an if to avoid accumulating floating point errors
172  if (fgreater(deltaH,0.0))
173  fAmountLearned += deltaH;
174 
175  // Move -------------------------------------------------------------------------
176  if (1) // daLRTA*
177  thePath.push_back(neighbors[minD]);
178  else
179  thePath.push_back(neighbors[minC]);
180  thePath.push_back(from);
181  return;
182 }
183 
184 template <class state, class action, class environment>
185 void LRTAStar<state, action, environment>::OpenGLDraw(const environment *e) const
186 {
187  double learned = 0;
188  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
189  {
190  double thisState = (*it).second.theHeuristic;
191  if (learned < thisState)
192  learned = thisState;
193  }
194  for (typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
195  {
196  double r = (*it).second.theHeuristic;
197  if (r > 0)
198  {
199  e->SetColor(0.5+0.5*r/learned, 0, 0, 0.1+0.8*r/learned);
200  e->OpenGLDraw((*it).second.theState);
201  }
202  }
203 }
204 
205 #endif
LRTAStar::GetName
virtual const char * GetName()
Definition: LRTAStar.h:37
LRTAStar::LogFinalStats
virtual void LogFinalStats(StatCollection *s)
Definition: LRTAStar.h:58
LRTAStar::GetNodesTouched
virtual uint64_t GetNodesTouched() const
Definition: LRTAStar.h:57
learnedData::theHeuristic
double theHeuristic
Definition: LRTAStar.h:24
LRTAStar::GetNodesExpanded
virtual uint64_t GetNodesExpanded() const
Definition: LRTAStar.h:56
LearningAlgorithm
Definition: LearningAlgorithm.h:15
FPUtil.h
LRTAStar::LearnedHeuristic
std::unordered_map< uint64_t, learnedData< state >, Hash64 > LearnedHeuristic
Definition: LRTAStar.h:67
LRTAStar
Definition: LRTAStar.h:29
LRTAStar::nodesExpanded
uint64_t nodesExpanded
Definition: LRTAStar.h:72
LRTAStar::goal
state goal
Definition: LRTAStar.h:70
Hash64
Definition: SearchEnvironment.h:23
fless
bool fless(double a, double b)
Definition: FPUtil.h:28
LRTAStar::~LRTAStar
virtual ~LRTAStar(void)
Definition: LRTAStar.h:33
LRTAStar::nodesTouched
uint64_t nodesTouched
Definition: LRTAStar.h:72
fgreater
bool fgreater(double a, double b)
Definition: FPUtil.h:29
learnedData::theState
state theState
Definition: LRTAStar.h:23
LRTAStar::HCost
double HCost(environment *env, const state &from, const state &to) const
Definition: LRTAStar.h:43
StatCollection
The StatCollection class is for collecting stats across different parts of the simulation.
Definition: StatCollection.h:34
LRTAStar::fAmountLearned
double fAmountLearned
Definition: LRTAStar.h:71
LRTAStar::GetPath
void GetPath(environment *, const state &, const state &, std::vector< action > &)
Definition: LRTAStar.h:36
LRTAStar::LRTAStar
LRTAStar()
Definition: LRTAStar.h:31
LearningAlgorithm.h
StatCollection::AddStat
void AddStat(const char *category, const char *owner, double value)
Add a new stat entry for the given category, owner and value.
Definition: StatCollection.cpp:67
LRTAStar::GetAmountLearned
double GetAmountLearned()
Definition: LRTAStar.h:63
LRTAStar::SetHCost
void SetHCost(environment *env, const state &where, const state &to, double val)
Definition: LRTAStar.h:38
LRTAStar::GetPath
void GetPath(environment *env, const state &from, const state &to, std::vector< state > &thePath)
The core routine of LRTAStar – computes at most one-move path.
Definition: LRTAStar.h:77
learnedData
Definition: LRTAStar.h:22
LRTAStar::OpenGLDraw
void OpenGLDraw() const
Definition: LRTAStar.h:64
fequal
bool fequal(double a, double b, double tolerance=TOLERANCE)
Definition: FPUtil.h:32
LRTAStar::heur
LearnedHeuristic heur
Definition: LRTAStar.h:69