/Modules/Analytics/Source/Analytics.Web/Module/AnalyticsModule.cs
C# | 453 lines | 357 code | 69 blank | 27 comment | 42 complexity | d1fe85ee5889bbf9ffaf71de0ab121ee MD5 | raw file
Possible License(s): LGPL-3.0
- using System;
- using System.Data;
- using System.Configuration;
- using System.Linq;
- using System.Web;
- using System.Web.Security;
- using System.Web.UI;
- using System.Web.UI.HtmlControls;
- using System.Web.UI.WebControls;
- using System.Web.UI.WebControls.WebParts;
- using System.Xml.Linq;
- using System.Collections.Generic;
- using Analytics.Web.Models;
- using SocializeIt;
- using Tagging;
- using System.Data.Linq;
- using System.IO;
- using System.ServiceModel.Syndication;
- using System.Transactions;
- using Tagging.Web.Data;
- using System.Linq.Expressions;
- using SocializeIt.Services;
- using SocializeIt.Data;
- using System.Text;
- using SocializeIt.Comments;
- using log4net;
- using Analytics.SVM;
- using Analytics.Filesystem;
- using SocializeIt.Security;
-
- namespace Analytics.Web.Module
- {
- public class AnalyticsModule
- {
- IRepositoryFactory<IRiffRepository> syndicationRepository;
- IRiffDirectoryService riffDirectoryService;
- ITaggingModule taggingModule;
- IRepositoryFactory<ICommentRepository> commentRepository;
- IAnalyticsFactory analyticsFactory;
- ISecurableFactory securable;
-
- private static ILog log = LogManager.GetLogger(typeof(AnalyticsModule));
-
- public AnalyticsModule(
- IRepositoryFactory<IRiffRepository> syndicationRepository,
- IRiffDirectoryService riffDirectoryService,
- ITaggingModule taggingModule,
- IRepositoryFactory<ICommentRepository> commentRepository,
- IAnalyticsFactory analyticsFactory,
- ISecurableFactory securable)
- {
- this.syndicationRepository = syndicationRepository;
- this.riffDirectoryService = riffDirectoryService;
- this.taggingModule = taggingModule;
- this.commentRepository = commentRepository;
- this.analyticsFactory = analyticsFactory;
- this.securable = securable;
- }
-
- // Returns Suggestions in decreasing order of confidence
- public IEnumerable<TagSuggestion> GetSuggestions(Item item)
- {
- // Leer sancocho para ese riff y después recorrer los perceptrons
- var result = new List<TagSuggestion>();
- using (var repository = commentRepository.Create())
- {
- var tags = (from t in taggingModule.GetAllTags(repository as DataContext)
- where t.ItemTags.Any(x => x.ItemId == item.Id)
- select t).ToList();
-
- var itemId = item.Id.ToString();
-
- using (var fileLock = analyticsFactory.GetFileLockFor(item.Social.Id))
- {
- var master = analyticsFactory.Load(item.Social.Id);
- DocumentSpace space = master.DocumentSpace;
-
- Document doc = space.GetDocument(item.Id.ToString());
-
- if (doc == null)
- {
- // Doc not found, add it just for the suggestion process
- var comments = repository.ReadCommentsByItem(item.Id).ToList();
-
- var reader = GetHtmlTextReader(item, comments);
- doc = space.AddDocument(ToDocumentId(item.Id), reader);
- reader.Dispose();
- }
-
- foreach (var perceptronEntry in master.Perceptrons)
- {
- // Skip if it's the negative feedback already
- if (master.NegativeFeedback.Any(f => f.DocumentId == itemId && f.TagName == perceptronEntry.Key))
- {
- continue;
- }
-
- if (!tags.Any(x => x.Name == perceptronEntry.Key))
- {
- Perceptron perceptron = perceptronEntry.Value;
- var e = perceptron.Test(doc);
-
- // Skip suggestions with negative confidence
- if (e < 0)
- continue;
-
- result.Add(new TagSuggestion
- {
- TagName = perceptronEntry.Key,
- Confidence = e // TODO scale?
- });
- }
- }
-
- return result.OrderByDescending(t => t.Confidence)
- .Take(5).ToList();
- }
- }
- }
-
- public List<Item> GetConflictingItems(string classifierId, Guid itemId)
- {
- var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
-
- IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
- DocumentSpace space = master.DocumentSpace;
- Perceptron perceptron = master.GetPerceptron(classifierId);
-
- var thisDoc = space.GetDocument(ToDocumentId(itemId));
- if (thisDoc == null)
- return new List<Item>();
-
- IEnumerable<Document> docs;
- //TODO would be better to know if thisDoc is in negatives or positives.
- // and return the others.
- if (perceptron.Test(thisDoc) == 0)
- docs = perceptron.NegativeTrainingSet.Concat(perceptron.PositiveTrainingSet);
- else if (perceptron.Test(thisDoc) < 0)
- docs = perceptron.NegativeTrainingSet;
- else
- docs = perceptron.PositiveTrainingSet;
-
- // take the 4 similar items that are not thisDoc.
- var similarItemsIds = docs.Where(doc => doc.Id != thisDoc.Id)
- .OrderByDescending(doc => doc.Proximity(thisDoc))
- .Take(4)
- .Select(doc => new Guid(doc.Id)).ToList();
-
- return GetItems(similarItemsIds);
- }
-
- public List<Item> GetSimilarItems(Guid itemId)
- {
- var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
- IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
- DocumentSpace space = master.DocumentSpace;
-
- var items = GetRiffItems(riff.SocialName);
-
- var thisDoc = space.GetDocument(ToDocumentId(itemId));
- if (thisDoc == null)
- return new List<Item>();
-
- // take the 4 similar items that are not thisDoc.
- var similarItemsIds = items.Items.Select(x => x.Id).ToList()
- .Select(id => space.GetDocument(ToDocumentId(id)))
- .Where(doc => doc != null && doc.Id != thisDoc.Id)
- .OrderByDescending(doc => doc.Proximity(thisDoc))
- .Take(4)
- .Select(doc => new Guid(doc.Id)).ToList();
-
- return GetItems(similarItemsIds);
- }
-
- // For positive feedback, just adding a tag is enough for now
- public void PositiveFeedback(string tagName, Guid itemId)
- {
- var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
- SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
-
- using (var repo = commentRepository.Create())
- {
- taggingModule.AddTag(itemId, tagName);
- }
- }
-
- public void NegativeFeedback(string tagName, Guid itemId)
- {
- var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
- SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
-
- using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
- {
- analyticsFactory.ReportNegativeFeedback(riff.Id, new NegativeFeedback
- {
- DocumentId = itemId.ToString(),
- TagName = tagName,
- });
- }
- }
-
- private void UpdateClassifier(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem,
- IAnalyticsMaster oldMaster, IAnalyticsMaster newMaster)
- {
- DocumentSpace space = newMaster.DocumentSpace;
- var positives = new List<Document>();
- var negatives = new List<Document>();
-
- GetTagEvidence(tagName, tagsForItem, space, ref positives, ref negatives, oldMaster);
-
- Perceptron classifier = newMaster.CreatePerceptron(tagName);
-
- var newPositives = positives.Except(classifier.PositiveTrainingSet);
- var newNegatives = negatives.Except(classifier.NegativeTrainingSet);
- var positivesToRemove = classifier.PositiveTrainingSet.Except(positives);
- var negativesToRemove = classifier.NegativeTrainingSet.Except(negatives);
-
- classifier.AddPositives(newPositives);
- classifier.AddNegatives(newNegatives);
- classifier.RemovePositives(positivesToRemove);
- classifier.RemoveNegatives(negativesToRemove);
-
- classifier.Train(positives, negatives);
- }
-
- private void GetTagEvidence(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem,
- DocumentSpace space, ref List<Document> positives, ref List<Document> negatives,
- IAnalyticsMaster oldMaster)
- {
- IEnumerable<NegativeFeedback> negativeFeedback = oldMaster.NegativeFeedback;
-
- foreach(var entry in tagsForItem)
- {
- Guid itemId = entry.Key;
- List<TagOwner> tags = entry.Value;
- var doc = space.GetDocument(itemId.ToString());
- if (tags.Any(t => t.Tag == tagName))
- {
- // The document has the tag, so it's positive
- positives.Add(doc);
- }
- else
- {
- // The document doesn't have the tag, so it's negative
- negatives.Add(doc);
- }
- }
-
- // Add the negative feedback previosuly provided by the users
- foreach (var feedback in negativeFeedback.Where(f => f.TagName == tagName))
- {
- // Only if the negative feedback is not already present
- if (!negatives.Select(n => n.Id).Contains(feedback.DocumentId))
- {
- var doc = space.GetDocument(feedback.DocumentId);
- negatives.Add(doc);
- }
- }
-
- // Also add previous negative feedback of this tagName, only if it's
- // now not in the positives
- var oldPerceptron = oldMaster.GetPerceptron(tagName);
- if (oldPerceptron != null)
- {
- foreach (var oldNegativeDoc in oldPerceptron.NegativeTrainingSet)
- {
- // Check if the doc is *not* in the positives now, and add
- // it to the negatives (if not already there)
- if (!positives.Select(p => p.Id).Contains(oldNegativeDoc.Id))
- {
- if (!negatives.Select(n => n.Id).Contains(oldNegativeDoc.Id))
- {
- // Must add the new doc, not the old one
- var doc = space.GetDocument(oldNegativeDoc.Id);
- negatives.Add(doc);
- }
- }
- }
- }
- }
-
- private string ToDocumentId(Guid itemId)
- {
- return itemId.ToString();
- }
-
- private static string TagClassifierId(string riffName, string tagName)
- {
- return riffName + "+" + tagName;
- }
-
- private SearchResult GetRiffItems(string riffName)
- {
- //TODO change this, ShowHide module may hide items
- return SocialSystem.Current.SearchItems(
- RenderContext.CreateContext(new Uri("http://host/space"), riffName)
- );
- }
-
- public IEnumerable<Classifier> GetClassifiers(string riffName)
- {
- return GetClassifiers(riffName, null);
- }
-
- public Classifier GetClassifier(string riffName, string tagName)
- {
- return GetClassifiers(riffName, x => x.Name == tagName).SingleOrDefault();
- }
-
- private IEnumerable<Classifier> GetClassifiers(string riffName, Expression<Func<Tag,bool>> tagsFilter)
- {
- var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
-
- using (var repo = commentRepository.Create())
- {
- var tags = (repo as DataContext).GetTable<Tag>().AsQueryable();
- if (tagsFilter != null)
- tags = tags.Where(tagsFilter);
-
- IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
- IEnumerable<KeyValuePair<string, Perceptron>> perceptrons;
-
- perceptrons = master.Perceptrons;
-
- if (tagsFilter != null)
- {
- perceptrons = perceptrons.Where(p => tags.Select(t => t.Name).Contains(p.Key));
- }
-
- DocumentSpace space = master.DocumentSpace;
-
- return perceptrons.Select(x =>
- new Classifier
- {
- TagName = x.Key,
- Perceptron = x.Value,
- }).ToList();
- }
- }
-
- private HtmlTextReader GetHtmlTextReader(Item item, List<Comment> comments)
- {
- StringBuilder contentBuilder = new StringBuilder();
- contentBuilder.Append(item.Title);
- contentBuilder.Append(" ");
- contentBuilder.Append(item.Title);
- contentBuilder.Append(" ");
- contentBuilder.Append(item.Summary);
- contentBuilder.Append(" ");
- contentBuilder.Append(item.Content);
-
- if (comments != null)
- {
- foreach (var comment in comments)
- {
- contentBuilder.Append(" ");
- contentBuilder.Append(comment.Text);
- }
- }
-
- var reader = new HtmlTextReader(new StringReader(contentBuilder.ToString()));
- return reader;
- }
-
- public static string Content(SyndicationContent content)
- {
- if (content is TextSyndicationContent)
- {
- return (content as TextSyndicationContent).Text;
- }
- else
- {
- return string.Empty;
- }
- }
-
- public void UpdateRiff(string riffName)
- {
- SecurityGuard.GuardAction(securable.From(riffName), SecurableActions.Update);
-
- var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
-
- // Get the items of this riff
- var itemsResult = GetRiffItems(riffName);
-
- var commentsByItemId = itemsResult.DataContext.GetTable<Comment>()
- .Where(c => itemsResult.Items.Any(i => i.Id == c.ItemId))
- .ToList()
- .GroupBy(c => c.ItemId)
- .ToDictionary(g => g.Key, g => g.ToList());
-
- // Get tags of those items
- var tagsForItem = taggingModule
- .GetTagsForItems(itemsResult.DataContext, itemsResult.Items)
- .ToDictionary(g => g.Key, g => g.ToList());
-
- using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
- {
- // Load the old master for this riff
- IAnalyticsMaster oldMaster = analyticsFactory.Load(riff.Id);
-
- // Create a new master for this riff
- IAnalyticsMaster newMaster = analyticsFactory.Create(riff.Id);
-
- // Create documents for each of them
- // (count word frequencies, build tables, etc.)
- foreach (var item in itemsResult.Items)
- {
- List<Comment> comments;
- commentsByItemId.TryGetValue(item.Id, out comments);
-
- var reader = GetHtmlTextReader(item, comments);
- var doc = newMaster.DocumentSpace.AddDocument(ToDocumentId(item.Id), reader);
- reader.Dispose();
- }
-
- // Update classifiers
- foreach (var tag in taggingModule.GetTagsForSocial(riffName).Distinct(new TagOwnerComparer()))
- {
- UpdateClassifier(tag.Tag, tagsForItem, oldMaster, newMaster);
- }
-
- newMaster.Flush();
- }
- }
-
- class TagOwnerComparer : IEqualityComparer<TagOwner>
- {
- public bool Equals(TagOwner x, TagOwner y)
- {
- return x.Tag == y.Tag;
- }
-
- public int GetHashCode(TagOwner obj)
- {
- return obj.Tag.GetHashCode();
- }
- }
-
- private List<Item> GetItems(List<Guid> similarItemsIds)
- {
- if (similarItemsIds.Count == 0)
- return new List<Item>();
- else
- {
- using (var riffRepo = syndicationRepository.Create())
- {
- return riffRepo.ReadItems().Where(x => similarItemsIds.Contains(x.Id)).ToList();
- }
- }
- }
- }
- }