PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/src/plugins/query_capture/tests/ut-query-capture.cpp

https://github.com/sileht/debbot-seeks
C++ | 439 lines | 385 code | 35 blank | 19 comment | 0 complexity | 4d28c5858e058793dd62b5aaa578ed6a MD5 | raw file
  1. /**
  2. * The Seeks proxy and plugin framework are part of the SEEKS project.
  3. * Copyright (C) 2010, 2011 Emmanuel Benazera <ebenazer@seeks-project.info>
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License as
  7. * published by the Free Software Foundation, either version 3 of the
  8. * License, or (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Affero General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Affero General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #define _PCREPOSIX_H // avoid pcreposix.h conflict with regex.h used by gtest
  19. #include <gtest/gtest.h>
  20. #include "query_capture.h"
  21. #include "db_query_record.h"
  22. #include "qprocess.h"
  23. #include "user_db.h"
  24. #include "query_context.h"
  25. #include "seeks_proxy.h"
  26. #include "plugin_manager.h"
  27. #include "proxy_configuration.h"
  28. #include "errlog.h"
  29. using namespace seeks_plugins;
  30. using namespace sp;
  31. using namespace lsh;
  32. const std::string dbfile = "seeks_test.db";
  33. const std::string basedir = "../../../";
  34. static std::string queries[2] =
  35. {
  36. "seeks",
  37. "seeks project"
  38. };
  39. static std::string keys[1] =
  40. {
  41. "1645a6897e62417931f26bcbdf4687c9c026b626"
  42. };
  43. static std::string uris[3] =
  44. {
  45. "http://www.seeks-project.info/",
  46. "http://seeks-project.info/wiki/index.php/Documentation",
  47. "http://www.seeks-project.info/wiki/index.php/Download"
  48. };
  49. class QCTest : public testing::Test
  50. {
  51. protected:
  52. QCTest()
  53. {
  54. }
  55. virtual ~QCTest()
  56. {
  57. }
  58. virtual void SetUp()
  59. {
  60. unlink(dbfile.c_str());
  61. seeks_proxy::_configfile = basedir + "config";
  62. seeks_proxy::initialize_mutexes();
  63. errlog::init_log_module();
  64. errlog::set_debug_level(LOG_LEVEL_FATAL | LOG_LEVEL_ERROR | LOG_LEVEL_INFO);
  65. seeks_proxy::_basedir = basedir.c_str();
  66. plugin_manager::_plugin_repository = basedir + "/plugins/";
  67. seeks_proxy::_config = new proxy_configuration(seeks_proxy::_configfile);
  68. //query_capture_configuration *qcc = new query_capture_configuration("");
  69. seeks_proxy::_user_db = new user_db(dbfile);
  70. seeks_proxy::_user_db->open_db();
  71. plugin_manager::load_all_plugins();
  72. plugin_manager::start_plugins();
  73. plugin *pl = plugin_manager::get_plugin("query-capture");
  74. ASSERT_TRUE(NULL!=pl);
  75. qcpl = static_cast<query_capture*>(pl);
  76. ASSERT_TRUE(NULL!=qcpl);
  77. qcelt = qcpl->_qelt;
  78. // check that the db is empty.
  79. ASSERT_TRUE(seeks_proxy::_user_db!=NULL);
  80. ASSERT_EQ(0,seeks_proxy::_user_db->number_records());
  81. }
  82. virtual void TearDown()
  83. {
  84. plugin_manager::close_all_plugins(); // XXX: beware, not very clean.
  85. seeks_proxy::_user_db->close_db();
  86. delete seeks_proxy::_user_db;
  87. delete seeks_proxy::_config;
  88. unlink(dbfile.c_str());
  89. }
  90. query_capture *qcpl;
  91. query_capture_element *qcelt;
  92. };
  93. TEST(QCAPITest,process_url)
  94. {
  95. std::string url = "http://www.seeks-project.info/pr/";
  96. std::string host, path;
  97. query_capture::process_url(url,host,path);
  98. EXPECT_EQ("seeks-project.info",host);
  99. EXPECT_EQ("/pr/",path);
  100. EXPECT_EQ("http://www.seeks-project.info/pr",url);
  101. }
  102. TEST_F(QCTest,store_url_sp)
  103. {
  104. search_snippet sp;
  105. sp.set_url(uris[0]);
  106. std::string title = "The Seeks Project";
  107. sp.set_title(title);
  108. std::string summary = "Seeks Project homepage";
  109. sp.set_summary(summary);
  110. sp.set_lang("en");
  111. DHTKey key = DHTKey::from_rstring(keys[0]);
  112. std::string host;
  113. std::string url = uris[0];
  114. query_capture::process_url(url,host);
  115. uint32_t radius = 0;
  116. try
  117. {
  118. qcelt->store_url(key,queries[0],
  119. url,host,radius,"query-capture",&sp);
  120. }
  121. catch (sp_exception &e)
  122. {
  123. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  124. }
  125. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  126. db_record *dbr = seeks_proxy::_user_db->find_dbr(keys[0],"query-capture");
  127. ASSERT_TRUE(dbr!=NULL);
  128. db_query_record *dbqr = dynamic_cast<db_query_record*>(dbr);
  129. ASSERT_TRUE(dbqr!=NULL);
  130. hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit
  131. = dbqr->_related_queries.find(queries[0].c_str());
  132. ASSERT_FALSE(dbqr->_related_queries.end()==hit);
  133. ASSERT_TRUE((*hit).second!=NULL);
  134. ASSERT_EQ(queries[0],(*hit).second->_query);
  135. ASSERT_EQ(0,(*hit).second->_radius);
  136. ASSERT_EQ(1,(*hit).second->_hits);
  137. ASSERT_TRUE((*hit).second->_visited_urls!=NULL);
  138. ASSERT_EQ(1,(*hit).second->_visited_urls->size());
  139. vurl_data *vd = (*(*hit).second->_visited_urls->begin()).second;
  140. ASSERT_TRUE(vd!=NULL);
  141. ASSERT_EQ(url,vd->_url);
  142. ASSERT_EQ(title,vd->_title);
  143. ASSERT_EQ(summary,vd->_summary);
  144. ASSERT_EQ("en",vd->_url_lang);
  145. delete dbqr;
  146. }
  147. TEST_F(QCTest,store_queries)
  148. {
  149. try
  150. {
  151. qcelt->store_queries(queries[0],"query-capture");
  152. }
  153. catch (sp_exception &e)
  154. {
  155. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  156. }
  157. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  158. hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
  159. qprocess::generate_query_hashes(queries[0],0,5,features);
  160. ASSERT_EQ(1,features.size());
  161. DHTKey key = (*features.begin()).second;
  162. std::string key_str = key.to_rstring();
  163. db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,"query-capture");
  164. ASSERT_TRUE(dbr!=NULL);
  165. db_query_record *dbqr = dynamic_cast<db_query_record*>(dbr);
  166. ASSERT_TRUE(dbqr!=NULL);
  167. ASSERT_EQ(1,dbqr->_related_queries.size());
  168. hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit
  169. = dbqr->_related_queries.find(queries[0].c_str());
  170. ASSERT_FALSE(dbqr->_related_queries.end()==hit);
  171. ASSERT_TRUE((*hit).second!=NULL);
  172. ASSERT_EQ(queries[0],(*hit).second->_query);
  173. ASSERT_EQ(0,(*hit).second->_radius);
  174. ASSERT_EQ(1,(*hit).second->_hits);
  175. ASSERT_TRUE((*hit).second->_visited_urls==NULL);
  176. delete dbqr;
  177. }
  178. TEST_F(QCTest,store_queries_url)
  179. {
  180. std::string url = uris[1];
  181. std::string host,path;
  182. query_capture::process_url(url,host,path);
  183. hash_map<const char*,const char*,hash<const char*>,eqstr> *parameters
  184. = new hash_map<const char*,const char*,hash<const char*>,eqstr>();
  185. miscutil::add_map_entry(parameters,"q",1,queries[0].c_str(),1);
  186. std::list<const char*> headers;
  187. query_context qc(parameters,headers);
  188. try
  189. {
  190. qcelt->store_queries(qc._lc_query,&qc,url,host,"query-capture");
  191. }
  192. catch (sp_exception &e)
  193. {
  194. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  195. }
  196. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  197. miscutil::free_map(parameters);
  198. hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
  199. qprocess::generate_query_hashes(queries[0],0,5,features);
  200. ASSERT_EQ(1,features.size());
  201. DHTKey key = (*features.begin()).second;
  202. std::string key_str = key.to_rstring();
  203. db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,"query-capture");
  204. ASSERT_TRUE(dbr!=NULL);
  205. db_query_record *dbqr = dynamic_cast<db_query_record*>(dbr);
  206. ASSERT_TRUE(dbqr!=NULL);
  207. hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit
  208. = dbqr->_related_queries.find(queries[0].c_str());
  209. ASSERT_TRUE((*hit).second->_visited_urls!=NULL);
  210. ASSERT_EQ(2,(*hit).second->_visited_urls->size()); // host and url.
  211. delete dbqr;
  212. }
  213. TEST_F(QCTest,store_queries_url_merge)
  214. {
  215. std::string url = uris[1];
  216. std::string host,path;
  217. query_capture::process_url(url,host,path);
  218. hash_map<const char*,const char*,hash<const char*>,eqstr> *parameters
  219. = new hash_map<const char*,const char*,hash<const char*>,eqstr>();
  220. miscutil::add_map_entry(parameters,"q",1,queries[0].c_str(),1);
  221. std::list<const char*> headers;
  222. query_context qc(parameters,headers);
  223. try
  224. {
  225. qcelt->store_queries(qc._lc_query,&qc,url,host,"query-capture");
  226. }
  227. catch (sp_exception &e)
  228. {
  229. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  230. }
  231. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  232. miscutil::free_map(parameters);
  233. std::string url2 = uris[2];
  234. query_capture::process_url(url2,host,path);
  235. try
  236. {
  237. qcelt->store_queries(qc._lc_query,&qc,url2,host,"query-capture");
  238. }
  239. catch (sp_exception &e)
  240. {
  241. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  242. }
  243. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  244. hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
  245. qprocess::generate_query_hashes(queries[0],0,5,features);
  246. ASSERT_EQ(1,features.size());
  247. DHTKey key = (*features.begin()).second;
  248. std::string key_str = key.to_rstring();
  249. db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,"query-capture");
  250. ASSERT_TRUE(dbr!=NULL);
  251. db_query_record *dbqr = dynamic_cast<db_query_record*>(dbr);
  252. ASSERT_TRUE(dbqr!=NULL);
  253. ASSERT_EQ(1,dbqr->_related_queries.size());
  254. hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit
  255. = dbqr->_related_queries.find(queries[0].c_str());
  256. ASSERT_TRUE((*hit).second->_visited_urls!=NULL);
  257. ASSERT_EQ(3,(*hit).second->_visited_urls->size()); // one domain, two urls.
  258. hash_map<const char*,vurl_data*,hash<const char*>,eqstr>::iterator vit
  259. = (*hit).second->_visited_urls->find(host.c_str());
  260. ASSERT_TRUE(vit!=(*hit).second->_visited_urls->end());
  261. ASSERT_EQ(2,(*vit).second->_hits);
  262. delete dbqr;
  263. }
  264. TEST_F(QCTest,remove_url)
  265. {
  266. std::string url = uris[1];
  267. std::string host,path;
  268. query_capture::process_url(url,host,path);
  269. hash_map<const char*,const char*,hash<const char*>,eqstr> *parameters
  270. = new hash_map<const char*,const char*,hash<const char*>,eqstr>();
  271. miscutil::add_map_entry(parameters,"q",1,queries[0].c_str(),1);
  272. std::list<const char*> headers;
  273. query_context qc(parameters,headers);
  274. try
  275. {
  276. qcelt->store_queries(qc._lc_query,&qc,url,host,"query-capture");
  277. }
  278. catch (sp_exception &e)
  279. {
  280. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  281. }
  282. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  283. miscutil::free_map(parameters);
  284. hash_multimap<uint32_t,DHTKey,id_hash_uint> features;
  285. qprocess::generate_query_hashes(queries[0],0,5,features);
  286. ASSERT_EQ(1,features.size());
  287. DHTKey key = (*features.begin()).second;
  288. qcelt->remove_url(key,queries[0],url,host,
  289. 1,0,"query-capture");
  290. ASSERT_EQ(1,seeks_proxy::_user_db->number_records());
  291. std::string key_str = key.to_rstring();
  292. db_record *dbr = seeks_proxy::_user_db->find_dbr(key_str,"query-capture");
  293. ASSERT_TRUE(dbr!=NULL);
  294. db_query_record *dbqr = dynamic_cast<db_query_record*>(dbr);
  295. ASSERT_TRUE(dbqr!=NULL);
  296. ASSERT_EQ(1,dbqr->_related_queries.size());
  297. hash_map<const char*,query_data*,hash<const char*>,eqstr>::iterator hit
  298. = dbqr->_related_queries.find(queries[0].c_str());
  299. ASSERT_TRUE((*hit).second->_visited_urls==NULL); // protobuffers read no urls in record.
  300. delete dbqr;
  301. }
  302. TEST_F(QCTest,remove_queries)
  303. {
  304. try
  305. {
  306. qcelt->store_queries(queries[1],"query-capture",5);
  307. }
  308. catch (sp_exception &e)
  309. {
  310. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  311. }
  312. ASSERT_EQ(3,seeks_proxy::_user_db->number_records());
  313. try
  314. {
  315. qcelt->remove_queries(queries[1],"query-capture",5);
  316. }
  317. catch (sp_exception &e)
  318. {
  319. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  320. }
  321. ASSERT_EQ(0,seeks_proxy::_user_db->number_records());
  322. try
  323. {
  324. qcelt->store_queries(queries[1],"query-capture",5);
  325. }
  326. catch (sp_exception &e)
  327. {
  328. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  329. }
  330. ASSERT_EQ(3,seeks_proxy::_user_db->number_records());
  331. try
  332. {
  333. qcelt->remove_queries(queries[1],"query-capture",0);
  334. }
  335. catch (sp_exception &e)
  336. {
  337. ASSERT_EQ(SP_ERR_OK,e.code()); // would fail.
  338. }
  339. ASSERT_EQ(2,seeks_proxy::_user_db->number_records());
  340. bool fail=false;
  341. try
  342. {
  343. qcelt->remove_queries("seeksy","query-capture",0);
  344. }
  345. catch (sp_exception &e)
  346. {
  347. fail = true;
  348. ASSERT_EQ(DB_ERR_NO_REC,e.code());
  349. }
  350. ASSERT_TRUE(fail);
  351. ASSERT_EQ(2,seeks_proxy::_user_db->number_records());
  352. }
  353. TEST(DBRTest,serialize_deserialize)
  354. {
  355. db_query_record *dbr = new db_query_record("query-capture",queries[0],0,
  356. uris[1],1,1,"Seeks Project Documentation",
  357. "Seeks project documentation",0);
  358. std::string msg;
  359. int err = dbr->serialize(msg);
  360. ASSERT_EQ(0,err);
  361. delete dbr;
  362. std::cerr << "msg length: " << msg.length() << std::endl;
  363. dbr = new db_query_record("query-capture","project",0);
  364. err = dbr->deserialize(msg);
  365. ASSERT_EQ(0,err);
  366. delete dbr;
  367. }
  368. TEST(DBRTest,serialize_deserialize_compressed)
  369. {
  370. db_query_record *dbr = new db_query_record("query-capture",queries[0],0,
  371. uris[1],1,1,"Seeks Project Documentation",
  372. "Seeks project documentation",0);
  373. std::string msg;
  374. int err = dbr->serialize_compressed(msg);
  375. ASSERT_EQ(0,err);
  376. delete dbr;
  377. ASSERT_FALSE(msg.empty());
  378. std::cerr << "msg length: " << msg.length() << std::endl;
  379. dbr = new db_query_record("query-capture","project",0);
  380. err = dbr->deserialize_compressed(msg);
  381. ASSERT_EQ(0,err);
  382. delete dbr;
  383. }
  384. TEST(DBRTest,serialize_deserialize_compressed_mix)
  385. {
  386. db_query_record *dbr = new db_query_record("query-capture",queries[0],0,
  387. uris[1],1,1,"Seeks Project Documentation",
  388. "Seeks project documentation",0);
  389. std::string msg;
  390. int err = dbr->serialize(msg);
  391. ASSERT_EQ(0,err);
  392. delete dbr;
  393. ASSERT_FALSE(msg.empty());
  394. std::cerr << "msg length: " << msg.length() << std::endl;
  395. dbr = new db_query_record("query-capture","project",0);
  396. err = dbr->deserialize_compressed(msg); // fails on deserializing compressed msg, falls back on non compressed deserilization.
  397. ASSERT_EQ(0,err);
  398. delete dbr;
  399. }
  400. int main(int argc, char **argv)
  401. {
  402. ::testing::InitGoogleTest(&argc, argv);
  403. return RUN_ALL_TESTS();
  404. }