/tags/rel-0-1-0/FreeSpeech/NNet/src/FFNet.cc

# · C++ · 421 lines · 301 code · 69 blank · 51 comment · 73 complexity · 1668bd7df4fdcc16cd454d3cf58936db MD5 · raw file

  1. #include "FFNet.h"
  2. #include <vector>
  3. #include "ObjectParser.h"
  4. DECLARE_TYPE(FFNet)
  5. FFNet::FFNet(const Vector<int> &_topo, const vector<string> &functions)
  6. : topo(_topo)
  7. , layers(topo.size()-1)
  8. {
  9. //topo = _topo;
  10. for (int i=0;i<topo.size()-1;i++)
  11. {
  12. layers[i]=new FFLayer(topo[i+1],topo[i], functions[i]);
  13. layers[i]->init(1.0);
  14. }
  15. }
  16. FFNet::FFNet(const Vector<int> &_topo)
  17. : topo(_topo)
  18. , layers(topo.size()-1)
  19. {
  20. //topo = _topo;
  21. for (int i=0;i<topo.size()-1;i++)
  22. {
  23. if (i==topo.size()-2)
  24. {
  25. layers[i]=new FFLayer(topo[i+1],topo[i], "lin");
  26. } else
  27. layers[i]=new FFLayer(topo[i+1],topo[i]);
  28. layers[i]->init(1.0);
  29. }
  30. layers[0]->init(5);
  31. }
  32. double *FFNet::calc(const double *input)
  33. {
  34. layers[0]->update(input);
  35. for (int i=1;i<layers.size();i++)
  36. layers[i]->update(layers[i-1]->getValue());
  37. return layers[layers.size()-1]->getValue();
  38. }
  39. void FFNet::learn(double *input, double *output, double alpha)
  40. {
  41. int outputLayer = topo.size()-2;
  42. calc(input);
  43. //start with the output layer, towards the input
  44. for (int k=outputLayer;k>=0;k--)
  45. {
  46. FFLayer *currentLayer = layers[k];
  47. double *previousValue, *currentValue;
  48. if (k==0)
  49. previousValue = input;
  50. else
  51. previousValue = layers[k-1]->getValue();
  52. currentValue = currentLayer->getValue();
  53. int layerSize = topo[k+1];
  54. int layerInputs = topo[k];
  55. double *delta = currentLayer->getError();
  56. for (int i=0;i<layerSize;i++)
  57. {
  58. double *w = currentLayer->getTmpWeights(i);
  59. //cerr << k << endl;
  60. if (k==outputLayer)
  61. {
  62. //cerr << "output layer\n";
  63. delta[i]=output[i]-currentValue[i];
  64. //error += delta[i]*delta[i];
  65. //cout << "error = " << delta[i] << endl;
  66. delta[i] = currentLayer->deriv[i]*delta[i];
  67. }
  68. else
  69. {
  70. delta[i] = 0;
  71. double *outErr = layers[k+1]->getError();
  72. for (int j=0;j<topo[k+2];j++)
  73. {
  74. double *outW = layers[k+1]->getWeights(j);
  75. delta[i]+= outErr[j]*outW[i];
  76. }
  77. delta[i] = currentLayer->deriv[i]*delta[i];
  78. }
  79. for (int j=0;j<layerInputs;j++)
  80. {
  81. w[j] += alpha * previousValue[j] * delta[i];
  82. }
  83. w[layerInputs] += alpha * delta[i];
  84. }
  85. }
  86. }
  87. void FFNet::train(vector<float *> tin, vector<float *> tout, int iter, double learnRate, double mom,
  88. double increase, double decrease, double errRatio)
  89. {
  90. //int worse=0;
  91. double error;
  92. double min_error=FLT_MAX;
  93. double last_error=FLT_MAX;
  94. double alpha = learnRate;
  95. double momentum=mom;
  96. while (iter)
  97. {
  98. int i,j;
  99. //error = 0;
  100. //cerr << "iter...\n";
  101. for (i=0;i<layers.size();i++)
  102. {
  103. layers[i]->copyToTmp();
  104. }
  105. for (i=0;i<tin.size();i++)
  106. {
  107. double in[topo[0]];
  108. double out[topo[topo.size()-1]];
  109. for (j=0;j<topo[0];j++)
  110. in[j]=tin[i][j];
  111. for (j=0;j<topo[topo.size()-1];j++)
  112. out[j]=tout[i][j];
  113. learn (in, out, alpha);
  114. }
  115. for (i=0;i<layers.size();i++)
  116. {
  117. layers[i]->copyFromTmp(momentum);
  118. }
  119. iter--;
  120. double SSE = 0;
  121. for (i=0;i<tin.size();i++)
  122. {
  123. double in[topo[0]];
  124. double out[topo[topo.size()-1]];
  125. for (j=0;j<topo[0];j++)
  126. in[j]=tin[i][j];
  127. for (j=0;j<topo[topo.size()-1];j++)
  128. out[j]=tout[i][j];
  129. double *netOut = calc (in);
  130. for (j=0;j<topo[topo.size()-1];j++)
  131. SSE += (netOut[j]-out[j])*(netOut[j]-out[j]);
  132. }
  133. //momentum=.85;
  134. if (SSE < min_error)
  135. {
  136. momentum = mom;
  137. alpha *= increase;
  138. //if (alpha > .0000015) alpha = .0000015;
  139. error = SSE;
  140. min_error=error;
  141. } else if (SSE<last_error)
  142. {
  143. error=SSE;
  144. } else if (SSE/errRatio > min_error)
  145. {
  146. cerr << SSE-last_error << endl;
  147. momentum=0;
  148. alpha *= decrease;
  149. //if (SSE/1.04 > min_error)
  150. for (i=0;i<layers.size();i++)
  151. layers[i]->undo();
  152. } else {
  153. alpha *= sqrt(decrease);
  154. momentum = 0;
  155. error=SSE;
  156. }
  157. cout << (error/tin.size()/topo[topo.size()-1]) << "\t" << alpha << endl;
  158. last_error = error;
  159. }
  160. }
  161. void FFNet::learnlm(double *input, double *output, double **jacob, double *err, double &sse)
  162. {
  163. int outputLayer = topo.size()-2;
  164. calc(input);
  165. int woffset1=0;
  166. int woffset2=0;
  167. int prev_offset=0;
  168. //start with the output layer, towards the input
  169. for (int k=outputLayer;k>=0;k--)
  170. {
  171. FFLayer *currentLayer = layers[k];
  172. double *previousValue, *currentValue;
  173. if (k==0)
  174. previousValue = input;
  175. else
  176. previousValue = layers[k-1]->getValue();
  177. currentValue = currentLayer->getValue();
  178. int layerSize = topo[k+1];
  179. int layerInputs = topo[k];
  180. //double *delta = currentLayer->getError();
  181. if (k==outputLayer)
  182. {
  183. woffset2=woffset1;
  184. //double *w = currentLayer->getTmpWeights(i);
  185. for (int ei=0;ei<layerSize;ei++)
  186. {
  187. err[ei] = currentValue[ei]-output[ei];
  188. sse += (currentValue[ei]-output[ei])*(currentValue[ei]-output[ei]);
  189. for (int wi=0;wi<layerInputs;wi++)
  190. jacob[ei][woffset2+wi] = currentLayer->deriv[ei]*previousValue[wi];
  191. jacob[ei][woffset2+layerInputs] = currentLayer->deriv[ei];
  192. woffset2+=layerInputs+1;
  193. }
  194. woffset1 += (layerInputs+1)*layerSize;
  195. } else {
  196. //FFLayer *nextLayer = layers[k+1];
  197. woffset2=woffset1;
  198. //double *w = currentLayer->getTmpWeights(i);
  199. for (int ni=0;ni<layerSize;ni++)
  200. {
  201. for (int ei=0;ei<topo[outputLayer];ei++)
  202. {
  203. double contrib=0;
  204. for (int t = prev_offset + ni ; t < woffset1 ; t += layerSize+1)
  205. contrib += jacob[ei][t];
  206. for (int wi=0;wi<layerInputs;wi++)
  207. jacob[ei][woffset2+wi] = contrib*currentLayer->deriv[ni]*previousValue[wi];
  208. jacob[ei][woffset2+layerInputs] = contrib*currentLayer->deriv[ni];
  209. }
  210. woffset2+=layerInputs+1;
  211. }
  212. prev_offset=woffset1;
  213. woffset1 += (layerInputs+1)*layerSize;
  214. }
  215. }
  216. }
  217. void FFNet::trainlm(vector<float *> tin, vector<float *> tout, int maxIter)
  218. {
  219. int nb_outputs = topo[topo.size()-1];
  220. double **jacob = new double * [nb_outputs];
  221. int nb_weights=0;
  222. for (int i=0;i<topo.size()-1;i++)
  223. nb_weights += (topo[i]+1)*topo[i+1];
  224. for (int i=0;i<nb_outputs;i++)
  225. jacob[i] = new double [nb_weights];
  226. double **jacob2 = new double * [nb_weights];
  227. for (int i=0;i<nb_weights;i++)
  228. jacob2[i] = new double [nb_weights];
  229. for (int iter=0; iter<maxIter; iter++)
  230. {
  231. double sse=0;
  232. //initialize jacobi
  233. for (int i=0;i<nb_outputs;i++)
  234. for (int j=0;j<nb_weights;j++)
  235. jacob[i][j]=0;
  236. //initializes error
  237. double err[nb_outputs];
  238. for (int i=0;i<nb_outputs;i++)
  239. err[i]=0;
  240. //initialize gradient
  241. double grad[nb_weights];
  242. for (int i=0;i<nb_weights;i++)
  243. grad[i]=0;
  244. //iterate on all data
  245. for (int i=0;i<tin.size();i++)
  246. //for (int i=0;i<tin.size();i+=100)
  247. {
  248. /*for (int j=0;j<nb_outputs;j++)
  249. for (int k=0;k<nb_weights;k++)
  250. jacob[j][k]=0;
  251. */
  252. double in[topo[0]];
  253. double out[topo[topo.size()-1]];
  254. for (int j=0;j<topo[0];j++)
  255. in[j]=tin[i][j];
  256. for (int j=0;j<topo[topo.size()-1];j++)
  257. out[j]=tout[i][j];
  258. learnlm (in, out, jacob, err, sse);
  259. for (int j=0;j<nb_weights;j++)
  260. for (int k=0;k<nb_outputs;k++)
  261. grad[j]+=jacob[k][j]*err[k];
  262. }
  263. /*
  264. for (int i=0;i<nb_outputs;i++)
  265. {
  266. for (int j=0;j<nb_weights;j++)
  267. cerr << jacob[i][j] << " ";
  268. cerr << endl;
  269. }
  270. */
  271. //calculate gradient
  272. /*for (int i=0;i<nb_weights;i++)
  273. for (int j=0;j<nb_outputs;j++)
  274. grad[i] += jacob[j][i]*err[j];
  275. */
  276. double delta[nb_weights];
  277. for (int i=0;i<nb_weights;i++)
  278. delta[i] = -.0000001*grad[i];
  279. /*
  280. for (int i=0;i<nb_outputs;i++)
  281. cerr << err[i] << " ";
  282. cerr << endl;
  283. for (int i=0;i<nb_weights;i++)
  284. cerr << grad[i] << " ";
  285. cerr << endl;
  286. for (int i=0;i<nb_weights;i++)
  287. cerr << delta[i] << " ";
  288. cerr << endl;
  289. */
  290. int offset=0;
  291. int outputLayer = topo.size()-2;
  292. for (int k=outputLayer;k>=0;k--)
  293. {
  294. FFLayer *currentLayer = layers[k];
  295. int layerSize = topo[k+1];
  296. int layerInputs = topo[k];
  297. for (int ni=0;ni<layerSize;ni++)
  298. {
  299. double *w = currentLayer->getWeights(ni);
  300. for (int wi=0;wi<layerInputs;wi++)
  301. {
  302. w[wi]+=delta[offset+wi];
  303. }
  304. w[layerInputs]+=delta[offset+layerInputs];
  305. offset += layerInputs+1;
  306. }
  307. }
  308. cout << (sse/tin.size()/topo[topo.size()-1]) << endl;
  309. }
  310. for (int i=0;i<nb_outputs;i++)
  311. delete [] jacob[i];
  312. delete [] jacob;
  313. for (int i=0;i<nb_weights;i++)
  314. delete [] jacob2[i];
  315. delete [] jacob2;
  316. }
  317. void FFNet::printOn(ostream &out) const
  318. {
  319. out << "<FFNet " << endl;
  320. out << "<topo " << topo << ">" << endl;
  321. out << "<layers " << layers << ">" << endl;
  322. out << ">\n";
  323. }
  324. void FFNet::readFrom (istream &in)
  325. {
  326. string tag;
  327. while (1)
  328. {
  329. char ch;
  330. in >> ch;
  331. if (ch == '>') break;
  332. else if (ch != '<')
  333. throw new ParsingException ("Parse error: '<' expected");
  334. in >> tag;
  335. if (tag == "topo")
  336. in >> topo;
  337. else if (tag == "layers")
  338. in >> layers;
  339. else
  340. throw new ParsingException ("unknown argument: " + tag);
  341. if (!in) throw new ParsingException ("Parse error trying to build " + tag);
  342. in >> tag;
  343. if (tag != ">")
  344. throw new ParsingException ("Parse error: '>' expected ");
  345. }
  346. }
  347. istream &operator >> (istream &in, FFNet &net)
  348. {
  349. if (!isValidType(in, "FFNet")) return in;
  350. net.readFrom(in);
  351. return in;
  352. }