10 #define gLSSLRTAStar_h
16 #include <unordered_map>
43 template <
class state>
52 template <
class state,
class action,
class environment>
69 void GetPath(environment *env,
const state& from,
const state& to, std::vector<state> &thePath);
70 void GetPath(environment *,
const state& ,
const state& , std::vector<action> & ) { assert(
false); };
72 {
static char name[255];
74 void SetHCost(environment *env,
const state &where,
const state &to,
double val)
79 heur[env->GetStateHash(where)].theHeuristic = val-
BaseHCost(env, where, to);
82 heur[env->GetStateHash(where)].theHeuristic = val-
BaseHCost(env, where, to);
83 heur[env->GetStateHash(where)].theState = where;
84 heur[env->GetStateHash(where)].gCost = DBL_MAX;
90 return heur[
m_pEnv->GetStateHash(from)].theHeuristic;
95 if (
verbose) std::cout <<
"Setting g-cost of " << s <<
" to " << cost <<
"\n";
97 if (val !=
heur.end())
103 heur[
m_pEnv->GetStateHash(s)].theHeuristic = 0;
110 auto val =
heur.find(
m_pEnv->GetStateHash(s));
111 if (val !=
heur.end())
113 return val->second.gCost;
117 double HCost(environment *env,
const state &from,
const state &to)
const
119 auto val =
heur.find(env->GetStateHash(from));
120 if (val !=
heur.end())
122 return val->second.theHeuristic+env->HCost(from, to);
129 double BaseHCost(environment *env,
const state &from,
const state &to)
const
132 double HCost(
const state &from,
const state &to)
const
138 for (
typename LearnedHeuristic::const_iterator it =
heur.begin(); it !=
heur.end(); it++)
140 double thisState = (*it).second.theHeuristic;
141 if (learned < thisState)
161 void OpenGLDraw(
const environment *env)
const;
164 typedef std::unordered_map<uint64_t, bool, Hash64 >
ClosedList;
182 template <
class state,
class action,
class environment>
195 if (initialHeuristic)
198 if ((tmp = BaseHCost(env, from, to)) > minInitialHeuristic)
200 initialHeuristic =
false;
201 maxLaterHeuristic = tmp;
204 minInitialHeuristic = tmp;
208 maxLaterHeuristic =
std::max(BaseHCost(env, from, to), maxLaterHeuristic);
218 astar.SetHeuristic(
this);
219 astar.InitializeSearch(env, from, to, thePath);
221 for (
int x = 0; x < nodeExpansionLimit; x++)
223 const state &tmp = astar.CheckNextNode();
226 env->GetSuccessors(tmp, succ);
227 for (
const auto &s : succ)
228 SetGCost(s,
std::min(GCost(s), GCost(tmp)+m_pEnv->GCost(tmp, s)));
233 astar.DoSingleSearchStep(thePath);
241 nodesExpanded = astar.GetNodesExpanded();
242 nodesTouched = astar.GetNodesTouched();
245 if (thePath.size() != 0)
253 double bestLearning = -1;
256 unsigned int openSize = astar.GetNumOpenItems();
257 for (
unsigned int x = 0; x <
openSize; x++)
259 const auto data = astar.GetOpenItem(x);
260 double currLearning = HCostLearned(data.data);
264 (currLearning == 0 && bestLearning != 0) ||
265 (currLearning == 0 && bestLearning == 0 && (
fless(data.g+data.h, bestF))) ||
266 (currLearning != 0 && bestLearning != 0 && (
fless(data.g+data.h, bestF))))
268 bestLearning = currLearning;
269 bestF = data.g+data.h;
274 if ((bestF == -1) || (
fless(data.g+data.h, bestF)))
276 bestF = data.g+data.h;
279 if (
fequal(data.g+data.h, bestF))
281 if (GCost(data.data) > GCost(first))
288 if (
verbose) std::cout <<
"Preparing border state: " << data.data <<
" h: " << data.h << std::endl;
291 std::vector<state> succ;
297 state s = q.top().theState;
298 if (
verbose) std::cout <<
"Starting with " << s <<
" h: " << q.top().heuristic <<
"/" << HCost(s, to) << std::endl;
301 env->GetSuccessors(s, succ);
302 double hCost = HCost(s, to);
303 for (
unsigned int x = 0; x < succ.size(); x++)
307 if (!astar.GetClosedListGCost(succ[x], succHCost))
309 if (
verbose) std::cout << succ[x] <<
" not in closed\n";
312 double edgeCost = env->GCost(s, succ[x]);
313 if (
verbose) std::cout << s <<
" to " << succ[x] <<
" " << edgeCost <<
" ";
314 succHCost = HCost(env, succ[x], to);
315 if (c[env->GetStateHash(succ[x])])
317 if (
verbose) std::cout << succ[x] <<
" updated before ";
318 if (
fless(hCost + edgeCost, succHCost))
320 if (
verbose) std::cout <<
"lowering cost to " << hCost + edgeCost <<
" from " << succHCost << std::endl;
321 fAmountLearned = fAmountLearned - (succHCost - (hCost+edgeCost));
322 if (
verbose) std::cout <<
" learning now " << fAmountLearned;
323 SetHCost(env, succ[x], to, hCost + edgeCost);
326 if (
verbose) std::cout << std::endl;
331 if (
verbose) std::cout << succ[x] <<
" NOT updated before ";
334 if (
verbose) std::cout <<
"setting cost to " << hCost + edgeCost <<
" over " << succHCost;
335 fAmountLearned += (edgeCost + hCost) - succHCost;
336 if (
verbose) std::cout <<
" learning now " << fAmountLearned;
337 SetHCost(env, succ[x], to, hCost + edgeCost);
339 c[env->GetStateHash(succ[x])] =
true;
341 if (
verbose) std::cout << std::endl;
351 astar.ExtractPathToStart(first, thePath);
352 if (
verbose) std::cout <<
"LSS-LRTA* heading towards " << thePath[0] <<
"with h-value " << HCost(env, thePath[0], to) << std::endl;
357 template <
class state,
class action,
class environment>
363 for (
typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
365 double thisState = (*it).second.theHeuristic;
367 if (learned < thisState)
370 for (
typename LearnedHeuristic::const_iterator it = heur.begin(); it != heur.end(); it++)
372 double r = (*it).second.theHeuristic;
376 e->SetColor(0.5+0.5*r/learned, 0, 0, 0.1+0.8*r/learned);
377 e->OpenGLDraw((*it).second.theState);