本文整理汇总了C#中SparseMatrix.addState方法的典型用法代码示例。如果您正苦于以下问题:C# SparseMatrix.addState方法的具体用法?C# SparseMatrix.addState怎么用?C# SparseMatrix.addState使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SparseMatrix
的用法示例。
在下文中一共展示了SparseMatrix.addState方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: runRL
public void runRL()
{
//CODE BELOW IS ONLY FOR TESTING RL
int NStates = 5, NActions = (int)COMMUNICATIVE_ACT.COMM_ITEMS, NEpisodes = 50, NIterations = 20;
Random rand = new Random();
SparseMatrix<DialogueState, DialogueAction> policy = new SparseMatrix<DialogueState, DialogueAction>();
for (int s = 0; s < NStates; s++)
{
//Create new State
DialogueState ds = new DialogueState();
ds.CurrentUtterance = "state" + s;
policy.addState(ds);
}
for (int a = 0; a < NActions; a++)
{
DialogueAction da = new DialogueAction();
da.setVerbalAct(new CommunicativeAct((COMMUNICATIVE_ACT)(a % (int)COMMUNICATIVE_ACT.COMM_ITEMS)));
policy.addAction(da);
}
DialogueState currState = policy.getStateList()[0];
DialogueAction currAct = policy.getActionList()[0];
//QLearner<DialogueState, DialogueAction> RLearner = new QLearner<DialogueState, DialogueAction>(0.95, 0.7, 0.15, policy, rewardFunction);
QLambdaLearner<DialogueState, DialogueAction> RLearner = new QLambdaLearner<DialogueState, DialogueAction>(0.95, 0.9, 0.15, 0.8, policy, rewardFunction);
//SARSALearner<DialogueState, DialogueAction> RLearner = new SARSALearner<DialogueState, DialogueAction>(0.95, 0.9, 0.15, 0.8, policy, rewardFunction);
RLearner.setStartState(currState);
//Iterate
for (int episode = 0; episode < NEpisodes; episode++)
{
currState = policy.getStateList()[0];
currAct = policy.getActionList()[0];
int stateIndex = 0;
for (int iteration = 0; (iteration < NIterations) && (currState.CurrentUtterance != "state4"); iteration++)
{
currAct = RLearner.nextAction();
//Observe new state
if (currAct.getCommunicativeAct().getCommActType() == COMMUNICATIVE_ACT.NO_COMM)
{
currState = policy.getStateList()[(++stateIndex) % NStates];
}
//Update (and observe reward)
RLearner.update(currState, currAct);
}
//Final update to get final reward
//qLearner.update(currState, currAct);
RLearner.newEpisode();
Console.WriteLine("EPISODE " + episode + ": " + RLearner.getEpisodeReward(episode));
}
Console.WriteLine("\nPolicy items: " + policy.getPolicy().Count + "\n");
foreach (Tuple<DialogueState, DialogueAction> t in policy.getPolicy().Keys)
{
Console.WriteLine("State: " + t.Item1.CurrentUtterance + ", Action: " + t.Item2.getCommunicativeAct().getCommActType() + "\n");
}
printPolicy(policy);
}