PageRenderTime 55ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/Modules/Analytics/Source/Analytics.Web/Module/AnalyticsModule.cs

https://bitbucket.org/instedd/riff
C# | 453 lines | 357 code | 69 blank | 27 comment | 42 complexity | d1fe85ee5889bbf9ffaf71de0ab121ee MD5 | raw file
Possible License(s): LGPL-3.0
  1. using System;
  2. using System.Data;
  3. using System.Configuration;
  4. using System.Linq;
  5. using System.Web;
  6. using System.Web.Security;
  7. using System.Web.UI;
  8. using System.Web.UI.HtmlControls;
  9. using System.Web.UI.WebControls;
  10. using System.Web.UI.WebControls.WebParts;
  11. using System.Xml.Linq;
  12. using System.Collections.Generic;
  13. using Analytics.Web.Models;
  14. using SocializeIt;
  15. using Tagging;
  16. using System.Data.Linq;
  17. using System.IO;
  18. using System.ServiceModel.Syndication;
  19. using System.Transactions;
  20. using Tagging.Web.Data;
  21. using System.Linq.Expressions;
  22. using SocializeIt.Services;
  23. using SocializeIt.Data;
  24. using System.Text;
  25. using SocializeIt.Comments;
  26. using log4net;
  27. using Analytics.SVM;
  28. using Analytics.Filesystem;
  29. using SocializeIt.Security;
  30. namespace Analytics.Web.Module
  31. {
  32. public class AnalyticsModule
  33. {
  34. IRepositoryFactory<IRiffRepository> syndicationRepository;
  35. IRiffDirectoryService riffDirectoryService;
  36. ITaggingModule taggingModule;
  37. IRepositoryFactory<ICommentRepository> commentRepository;
  38. IAnalyticsFactory analyticsFactory;
  39. ISecurableFactory securable;
  40. private static ILog log = LogManager.GetLogger(typeof(AnalyticsModule));
  41. public AnalyticsModule(
  42. IRepositoryFactory<IRiffRepository> syndicationRepository,
  43. IRiffDirectoryService riffDirectoryService,
  44. ITaggingModule taggingModule,
  45. IRepositoryFactory<ICommentRepository> commentRepository,
  46. IAnalyticsFactory analyticsFactory,
  47. ISecurableFactory securable)
  48. {
  49. this.syndicationRepository = syndicationRepository;
  50. this.riffDirectoryService = riffDirectoryService;
  51. this.taggingModule = taggingModule;
  52. this.commentRepository = commentRepository;
  53. this.analyticsFactory = analyticsFactory;
  54. this.securable = securable;
  55. }
  56. // Returns Suggestions in decreasing order of confidence
  57. public IEnumerable<TagSuggestion> GetSuggestions(Item item)
  58. {
  59. // Leer sancocho para ese riff y después recorrer los perceptrons
  60. var result = new List<TagSuggestion>();
  61. using (var repository = commentRepository.Create())
  62. {
  63. var tags = (from t in taggingModule.GetAllTags(repository as DataContext)
  64. where t.ItemTags.Any(x => x.ItemId == item.Id)
  65. select t).ToList();
  66. var itemId = item.Id.ToString();
  67. using (var fileLock = analyticsFactory.GetFileLockFor(item.Social.Id))
  68. {
  69. var master = analyticsFactory.Load(item.Social.Id);
  70. DocumentSpace space = master.DocumentSpace;
  71. Document doc = space.GetDocument(item.Id.ToString());
  72. if (doc == null)
  73. {
  74. // Doc not found, add it just for the suggestion process
  75. var comments = repository.ReadCommentsByItem(item.Id).ToList();
  76. var reader = GetHtmlTextReader(item, comments);
  77. doc = space.AddDocument(ToDocumentId(item.Id), reader);
  78. reader.Dispose();
  79. }
  80. foreach (var perceptronEntry in master.Perceptrons)
  81. {
  82. // Skip if it's the negative feedback already
  83. if (master.NegativeFeedback.Any(f => f.DocumentId == itemId && f.TagName == perceptronEntry.Key))
  84. {
  85. continue;
  86. }
  87. if (!tags.Any(x => x.Name == perceptronEntry.Key))
  88. {
  89. Perceptron perceptron = perceptronEntry.Value;
  90. var e = perceptron.Test(doc);
  91. // Skip suggestions with negative confidence
  92. if (e < 0)
  93. continue;
  94. result.Add(new TagSuggestion
  95. {
  96. TagName = perceptronEntry.Key,
  97. Confidence = e // TODO scale?
  98. });
  99. }
  100. }
  101. return result.OrderByDescending(t => t.Confidence)
  102. .Take(5).ToList();
  103. }
  104. }
  105. }
  106. public List<Item> GetConflictingItems(string classifierId, Guid itemId)
  107. {
  108. var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
  109. IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
  110. DocumentSpace space = master.DocumentSpace;
  111. Perceptron perceptron = master.GetPerceptron(classifierId);
  112. var thisDoc = space.GetDocument(ToDocumentId(itemId));
  113. if (thisDoc == null)
  114. return new List<Item>();
  115. IEnumerable<Document> docs;
  116. //TODO would be better to know if thisDoc is in negatives or positives.
  117. // and return the others.
  118. if (perceptron.Test(thisDoc) == 0)
  119. docs = perceptron.NegativeTrainingSet.Concat(perceptron.PositiveTrainingSet);
  120. else if (perceptron.Test(thisDoc) < 0)
  121. docs = perceptron.NegativeTrainingSet;
  122. else
  123. docs = perceptron.PositiveTrainingSet;
  124. // take the 4 similar items that are not thisDoc.
  125. var similarItemsIds = docs.Where(doc => doc.Id != thisDoc.Id)
  126. .OrderByDescending(doc => doc.Proximity(thisDoc))
  127. .Take(4)
  128. .Select(doc => new Guid(doc.Id)).ToList();
  129. return GetItems(similarItemsIds);
  130. }
  131. public List<Item> GetSimilarItems(Guid itemId)
  132. {
  133. var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
  134. IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
  135. DocumentSpace space = master.DocumentSpace;
  136. var items = GetRiffItems(riff.SocialName);
  137. var thisDoc = space.GetDocument(ToDocumentId(itemId));
  138. if (thisDoc == null)
  139. return new List<Item>();
  140. // take the 4 similar items that are not thisDoc.
  141. var similarItemsIds = items.Items.Select(x => x.Id).ToList()
  142. .Select(id => space.GetDocument(ToDocumentId(id)))
  143. .Where(doc => doc != null && doc.Id != thisDoc.Id)
  144. .OrderByDescending(doc => doc.Proximity(thisDoc))
  145. .Take(4)
  146. .Select(doc => new Guid(doc.Id)).ToList();
  147. return GetItems(similarItemsIds);
  148. }
  149. // For positive feedback, just adding a tag is enough for now
  150. public void PositiveFeedback(string tagName, Guid itemId)
  151. {
  152. var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
  153. SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
  154. using (var repo = commentRepository.Create())
  155. {
  156. taggingModule.AddTag(itemId, tagName);
  157. }
  158. }
  159. public void NegativeFeedback(string tagName, Guid itemId)
  160. {
  161. var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
  162. SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
  163. using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
  164. {
  165. analyticsFactory.ReportNegativeFeedback(riff.Id, new NegativeFeedback
  166. {
  167. DocumentId = itemId.ToString(),
  168. TagName = tagName,
  169. });
  170. }
  171. }
  172. private void UpdateClassifier(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem,
  173. IAnalyticsMaster oldMaster, IAnalyticsMaster newMaster)
  174. {
  175. DocumentSpace space = newMaster.DocumentSpace;
  176. var positives = new List<Document>();
  177. var negatives = new List<Document>();
  178. GetTagEvidence(tagName, tagsForItem, space, ref positives, ref negatives, oldMaster);
  179. Perceptron classifier = newMaster.CreatePerceptron(tagName);
  180. var newPositives = positives.Except(classifier.PositiveTrainingSet);
  181. var newNegatives = negatives.Except(classifier.NegativeTrainingSet);
  182. var positivesToRemove = classifier.PositiveTrainingSet.Except(positives);
  183. var negativesToRemove = classifier.NegativeTrainingSet.Except(negatives);
  184. classifier.AddPositives(newPositives);
  185. classifier.AddNegatives(newNegatives);
  186. classifier.RemovePositives(positivesToRemove);
  187. classifier.RemoveNegatives(negativesToRemove);
  188. classifier.Train(positives, negatives);
  189. }
  190. private void GetTagEvidence(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem,
  191. DocumentSpace space, ref List<Document> positives, ref List<Document> negatives,
  192. IAnalyticsMaster oldMaster)
  193. {
  194. IEnumerable<NegativeFeedback> negativeFeedback = oldMaster.NegativeFeedback;
  195. foreach(var entry in tagsForItem)
  196. {
  197. Guid itemId = entry.Key;
  198. List<TagOwner> tags = entry.Value;
  199. var doc = space.GetDocument(itemId.ToString());
  200. if (tags.Any(t => t.Tag == tagName))
  201. {
  202. // The document has the tag, so it's positive
  203. positives.Add(doc);
  204. }
  205. else
  206. {
  207. // The document doesn't have the tag, so it's negative
  208. negatives.Add(doc);
  209. }
  210. }
  211. // Add the negative feedback previosuly provided by the users
  212. foreach (var feedback in negativeFeedback.Where(f => f.TagName == tagName))
  213. {
  214. // Only if the negative feedback is not already present
  215. if (!negatives.Select(n => n.Id).Contains(feedback.DocumentId))
  216. {
  217. var doc = space.GetDocument(feedback.DocumentId);
  218. negatives.Add(doc);
  219. }
  220. }
  221. // Also add previous negative feedback of this tagName, only if it's
  222. // now not in the positives
  223. var oldPerceptron = oldMaster.GetPerceptron(tagName);
  224. if (oldPerceptron != null)
  225. {
  226. foreach (var oldNegativeDoc in oldPerceptron.NegativeTrainingSet)
  227. {
  228. // Check if the doc is *not* in the positives now, and add
  229. // it to the negatives (if not already there)
  230. if (!positives.Select(p => p.Id).Contains(oldNegativeDoc.Id))
  231. {
  232. if (!negatives.Select(n => n.Id).Contains(oldNegativeDoc.Id))
  233. {
  234. // Must add the new doc, not the old one
  235. var doc = space.GetDocument(oldNegativeDoc.Id);
  236. negatives.Add(doc);
  237. }
  238. }
  239. }
  240. }
  241. }
  242. private string ToDocumentId(Guid itemId)
  243. {
  244. return itemId.ToString();
  245. }
  246. private static string TagClassifierId(string riffName, string tagName)
  247. {
  248. return riffName + "+" + tagName;
  249. }
  250. private SearchResult GetRiffItems(string riffName)
  251. {
  252. //TODO change this, ShowHide module may hide items
  253. return SocialSystem.Current.SearchItems(
  254. RenderContext.CreateContext(new Uri("http://host/space"), riffName)
  255. );
  256. }
  257. public IEnumerable<Classifier> GetClassifiers(string riffName)
  258. {
  259. return GetClassifiers(riffName, null);
  260. }
  261. public Classifier GetClassifier(string riffName, string tagName)
  262. {
  263. return GetClassifiers(riffName, x => x.Name == tagName).SingleOrDefault();
  264. }
  265. private IEnumerable<Classifier> GetClassifiers(string riffName, Expression<Func<Tag,bool>> tagsFilter)
  266. {
  267. var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
  268. using (var repo = commentRepository.Create())
  269. {
  270. var tags = (repo as DataContext).GetTable<Tag>().AsQueryable();
  271. if (tagsFilter != null)
  272. tags = tags.Where(tagsFilter);
  273. IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
  274. IEnumerable<KeyValuePair<string, Perceptron>> perceptrons;
  275. perceptrons = master.Perceptrons;
  276. if (tagsFilter != null)
  277. {
  278. perceptrons = perceptrons.Where(p => tags.Select(t => t.Name).Contains(p.Key));
  279. }
  280. DocumentSpace space = master.DocumentSpace;
  281. return perceptrons.Select(x =>
  282. new Classifier
  283. {
  284. TagName = x.Key,
  285. Perceptron = x.Value,
  286. }).ToList();
  287. }
  288. }
  289. private HtmlTextReader GetHtmlTextReader(Item item, List<Comment> comments)
  290. {
  291. StringBuilder contentBuilder = new StringBuilder();
  292. contentBuilder.Append(item.Title);
  293. contentBuilder.Append(" ");
  294. contentBuilder.Append(item.Title);
  295. contentBuilder.Append(" ");
  296. contentBuilder.Append(item.Summary);
  297. contentBuilder.Append(" ");
  298. contentBuilder.Append(item.Content);
  299. if (comments != null)
  300. {
  301. foreach (var comment in comments)
  302. {
  303. contentBuilder.Append(" ");
  304. contentBuilder.Append(comment.Text);
  305. }
  306. }
  307. var reader = new HtmlTextReader(new StringReader(contentBuilder.ToString()));
  308. return reader;
  309. }
  310. public static string Content(SyndicationContent content)
  311. {
  312. if (content is TextSyndicationContent)
  313. {
  314. return (content as TextSyndicationContent).Text;
  315. }
  316. else
  317. {
  318. return string.Empty;
  319. }
  320. }
  321. public void UpdateRiff(string riffName)
  322. {
  323. SecurityGuard.GuardAction(securable.From(riffName), SecurableActions.Update);
  324. var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
  325. // Get the items of this riff
  326. var itemsResult = GetRiffItems(riffName);
  327. var commentsByItemId = itemsResult.DataContext.GetTable<Comment>()
  328. .Where(c => itemsResult.Items.Any(i => i.Id == c.ItemId))
  329. .ToList()
  330. .GroupBy(c => c.ItemId)
  331. .ToDictionary(g => g.Key, g => g.ToList());
  332. // Get tags of those items
  333. var tagsForItem = taggingModule
  334. .GetTagsForItems(itemsResult.DataContext, itemsResult.Items)
  335. .ToDictionary(g => g.Key, g => g.ToList());
  336. using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
  337. {
  338. // Load the old master for this riff
  339. IAnalyticsMaster oldMaster = analyticsFactory.Load(riff.Id);
  340. // Create a new master for this riff
  341. IAnalyticsMaster newMaster = analyticsFactory.Create(riff.Id);
  342. // Create documents for each of them
  343. // (count word frequencies, build tables, etc.)
  344. foreach (var item in itemsResult.Items)
  345. {
  346. List<Comment> comments;
  347. commentsByItemId.TryGetValue(item.Id, out comments);
  348. var reader = GetHtmlTextReader(item, comments);
  349. var doc = newMaster.DocumentSpace.AddDocument(ToDocumentId(item.Id), reader);
  350. reader.Dispose();
  351. }
  352. // Update classifiers
  353. foreach (var tag in taggingModule.GetTagsForSocial(riffName).Distinct(new TagOwnerComparer()))
  354. {
  355. UpdateClassifier(tag.Tag, tagsForItem, oldMaster, newMaster);
  356. }
  357. newMaster.Flush();
  358. }
  359. }
  360. class TagOwnerComparer : IEqualityComparer<TagOwner>
  361. {
  362. public bool Equals(TagOwner x, TagOwner y)
  363. {
  364. return x.Tag == y.Tag;
  365. }
  366. public int GetHashCode(TagOwner obj)
  367. {
  368. return obj.Tag.GetHashCode();
  369. }
  370. }
  371. private List<Item> GetItems(List<Guid> similarItemsIds)
  372. {
  373. if (similarItemsIds.Count == 0)
  374. return new List<Item>();
  375. else
  376. {
  377. using (var riffRepo = syndicationRepository.Create())
  378. {
  379. return riffRepo.ReadItems().Where(x => similarItemsIds.Contains(x.Id)).ToList();
  380. }
  381. }
  382. }
  383. }
  384. }