PageRenderTime 22ms CodeModel.GetById 2ms app.highlight 15ms RepoModel.GetById 1ms app.codeStats 0ms

/Modules/Analytics/Source/Analytics.Web/Module/AnalyticsModule.cs

https://bitbucket.org/instedd/riff
C# | 453 lines | 357 code | 69 blank | 27 comment | 42 complexity | d1fe85ee5889bbf9ffaf71de0ab121ee MD5 | raw file
  1using System;
  2using System.Data;
  3using System.Configuration;
  4using System.Linq;
  5using System.Web;
  6using System.Web.Security;
  7using System.Web.UI;
  8using System.Web.UI.HtmlControls;
  9using System.Web.UI.WebControls;
 10using System.Web.UI.WebControls.WebParts;
 11using System.Xml.Linq;
 12using System.Collections.Generic;
 13using Analytics.Web.Models;
 14using SocializeIt;
 15using Tagging;
 16using System.Data.Linq;
 17using System.IO;
 18using System.ServiceModel.Syndication;
 19using System.Transactions;
 20using Tagging.Web.Data;
 21using System.Linq.Expressions;
 22using SocializeIt.Services;
 23using SocializeIt.Data;
 24using System.Text;
 25using SocializeIt.Comments;
 26using log4net;
 27using Analytics.SVM;
 28using Analytics.Filesystem;
 29using SocializeIt.Security;
 30
 31namespace Analytics.Web.Module
 32{
 33	public class AnalyticsModule
 34	{
 35		IRepositoryFactory<IRiffRepository> syndicationRepository;
 36		IRiffDirectoryService riffDirectoryService;
 37		ITaggingModule taggingModule;
 38		IRepositoryFactory<ICommentRepository> commentRepository;
 39        IAnalyticsFactory analyticsFactory;
 40		ISecurableFactory securable;
 41
 42		private static ILog log = LogManager.GetLogger(typeof(AnalyticsModule));
 43
 44		public AnalyticsModule(
 45			IRepositoryFactory<IRiffRepository> syndicationRepository,
 46			IRiffDirectoryService riffDirectoryService,
 47			ITaggingModule taggingModule,
 48			IRepositoryFactory<ICommentRepository> commentRepository,
 49            IAnalyticsFactory analyticsFactory,
 50			ISecurableFactory securable)
 51		{
 52			this.syndicationRepository = syndicationRepository;
 53			this.riffDirectoryService = riffDirectoryService;
 54			this.taggingModule = taggingModule;
 55			this.commentRepository = commentRepository;
 56            this.analyticsFactory = analyticsFactory;
 57			this.securable = securable;
 58		}
 59
 60		// Returns Suggestions in decreasing order of confidence
 61		public IEnumerable<TagSuggestion> GetSuggestions(Item item)
 62		{
 63            // Leer sancocho para ese riff y despuļæ½s recorrer los perceptrons
 64			var result = new List<TagSuggestion>();
 65            using (var repository = commentRepository.Create())
 66            {
 67                var tags = (from t in taggingModule.GetAllTags(repository as DataContext)
 68                            where t.ItemTags.Any(x => x.ItemId == item.Id)
 69                            select t).ToList();
 70
 71                var itemId = item.Id.ToString();
 72
 73                using (var fileLock = analyticsFactory.GetFileLockFor(item.Social.Id))
 74                {
 75                    var master = analyticsFactory.Load(item.Social.Id);
 76                    DocumentSpace space = master.DocumentSpace;
 77
 78                    Document doc = space.GetDocument(item.Id.ToString());
 79
 80                    if (doc == null)
 81                    {
 82                        // Doc not found, add it just for the suggestion process
 83                        var comments = repository.ReadCommentsByItem(item.Id).ToList();
 84
 85                        var reader = GetHtmlTextReader(item, comments);
 86                        doc = space.AddDocument(ToDocumentId(item.Id), reader);
 87                        reader.Dispose();
 88                    }
 89
 90                    foreach (var perceptronEntry in master.Perceptrons)
 91                    {
 92                        // Skip if it's the negative feedback already
 93                        if (master.NegativeFeedback.Any(f => f.DocumentId == itemId && f.TagName == perceptronEntry.Key))
 94                        {
 95                            continue;
 96                        }
 97
 98                        if (!tags.Any(x => x.Name == perceptronEntry.Key))
 99                        {
100                            Perceptron perceptron = perceptronEntry.Value;
101                            var e = perceptron.Test(doc);
102
103                            // Skip suggestions with negative confidence
104                            if (e < 0)
105                                continue;
106
107                            result.Add(new TagSuggestion
108                            {
109                                TagName = perceptronEntry.Key,
110                                Confidence = e // TODO scale?
111                            });
112                        }
113                    }
114
115                    return result.OrderByDescending(t => t.Confidence)
116                            .Take(5).ToList();
117                }
118            }
119		}
120
121		public List<Item> GetConflictingItems(string classifierId, Guid itemId)
122		{
123			var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
124
125            IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
126            DocumentSpace space = master.DocumentSpace;
127            Perceptron perceptron = master.GetPerceptron(classifierId);
128
129			var thisDoc = space.GetDocument(ToDocumentId(itemId));
130			if (thisDoc == null)
131				return new List<Item>();
132
133			IEnumerable<Document> docs;
134			//TODO would be better to know if  thisDoc is in negatives or positives.
135			//		and return the others.
136			if (perceptron.Test(thisDoc) == 0)
137				docs = perceptron.NegativeTrainingSet.Concat(perceptron.PositiveTrainingSet);
138			else if (perceptron.Test(thisDoc) < 0)
139				docs = perceptron.NegativeTrainingSet;
140			else
141				docs = perceptron.PositiveTrainingSet;
142
143			// take the 4 similar items that are not thisDoc.
144			var similarItemsIds = docs.Where(doc => doc.Id != thisDoc.Id)
145				.OrderByDescending(doc => doc.Proximity(thisDoc))
146				.Take(4)
147				.Select(doc => new Guid(doc.Id)).ToList();
148
149			return GetItems(similarItemsIds);
150		}
151
152		public List<Item> GetSimilarItems(Guid itemId)
153		{
154			var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
155            IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
156            DocumentSpace space = master.DocumentSpace;
157
158			var items = GetRiffItems(riff.SocialName);
159
160			var thisDoc = space.GetDocument(ToDocumentId(itemId));
161			if (thisDoc == null)
162				return new List<Item>();
163
164			// take the 4 similar items that are not thisDoc.
165			var similarItemsIds = items.Items.Select(x => x.Id).ToList()
166				.Select(id => space.GetDocument(ToDocumentId(id)))
167				.Where(doc => doc != null && doc.Id != thisDoc.Id)
168				.OrderByDescending(doc => doc.Proximity(thisDoc))
169				.Take(4)
170				.Select(doc => new Guid(doc.Id)).ToList();
171
172			return GetItems(similarItemsIds);
173		}
174
175        // For positive feedback, just adding a tag is enough for now
176		public void PositiveFeedback(string tagName, Guid itemId)
177		{
178			var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
179			SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
180
181            using (var repo = commentRepository.Create())
182            {   
183                taggingModule.AddTag(itemId, tagName);
184            }
185		}
186
187        public void NegativeFeedback(string tagName, Guid itemId)
188		{
189			var riff = riffDirectoryService.ReadSocial(RiffFilters.WithItem(itemId));
190			SecurityGuard.GuardAction(securable.TagSuggestion(riff, itemId), Constants.Feedback);
191
192            using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
193            {
194                analyticsFactory.ReportNegativeFeedback(riff.Id, new NegativeFeedback
195                {
196                    DocumentId = itemId.ToString(),
197                    TagName = tagName,
198                });
199            }
200		}
201
202		private void UpdateClassifier(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem, 
203            IAnalyticsMaster oldMaster, IAnalyticsMaster newMaster)
204		{
205            DocumentSpace space = newMaster.DocumentSpace;
206			var positives = new List<Document>();
207			var negatives = new List<Document>();
208
209			GetTagEvidence(tagName, tagsForItem, space, ref positives, ref negatives, oldMaster);
210
211            Perceptron classifier = newMaster.CreatePerceptron(tagName);
212
213			var newPositives = positives.Except(classifier.PositiveTrainingSet);
214			var newNegatives = negatives.Except(classifier.NegativeTrainingSet);
215			var positivesToRemove = classifier.PositiveTrainingSet.Except(positives);
216			var negativesToRemove = classifier.NegativeTrainingSet.Except(negatives);
217
218			classifier.AddPositives(newPositives);
219			classifier.AddNegatives(newNegatives);
220			classifier.RemovePositives(positivesToRemove);
221			classifier.RemoveNegatives(negativesToRemove);
222
223			classifier.Train(positives, negatives);
224		}
225
226		private void GetTagEvidence(string tagName, Dictionary<Guid, List<TagOwner>> tagsForItem, 
227            DocumentSpace space, ref List<Document> positives, ref List<Document> negatives,
228            IAnalyticsMaster oldMaster)
229		{
230            IEnumerable<NegativeFeedback> negativeFeedback = oldMaster.NegativeFeedback;
231
232            foreach(var entry in tagsForItem)
233			{
234                Guid itemId = entry.Key;
235                List<TagOwner> tags = entry.Value;
236                var doc = space.GetDocument(itemId.ToString());
237                if (tags.Any(t => t.Tag == tagName))
238                {
239                    // The document has the tag, so it's positive
240                    positives.Add(doc);
241                }
242                else
243                {
244                    // The document doesn't have the tag, so it's negative
245                    negatives.Add(doc);
246                }
247			}
248
249            // Add the negative feedback previosuly provided by the users
250            foreach (var feedback in negativeFeedback.Where(f => f.TagName == tagName))
251            {
252                // Only if the negative feedback is not already present
253                if (!negatives.Select(n => n.Id).Contains(feedback.DocumentId))
254                {
255                    var doc = space.GetDocument(feedback.DocumentId);
256                    negatives.Add(doc);
257                }
258            }
259
260            // Also add previous negative feedback of this tagName, only if it's
261            // now not in the positives
262            var oldPerceptron = oldMaster.GetPerceptron(tagName);
263            if (oldPerceptron != null)
264            {
265                foreach (var oldNegativeDoc in oldPerceptron.NegativeTrainingSet)
266                {
267                    // Check if the doc is *not* in the positives now, and add
268                    // it to the negatives (if not already there)
269                    if (!positives.Select(p => p.Id).Contains(oldNegativeDoc.Id))
270                    {
271                        if (!negatives.Select(n => n.Id).Contains(oldNegativeDoc.Id))
272                        {
273                            // Must add the new doc, not the old one
274                            var doc = space.GetDocument(oldNegativeDoc.Id);
275                            negatives.Add(doc);
276                        }
277                    }
278                }
279            }
280		}
281
282		private string ToDocumentId(Guid itemId)
283		{
284			return itemId.ToString();
285		}
286
287		private static string TagClassifierId(string riffName, string tagName)
288		{
289			return riffName + "+" + tagName;
290		}
291
292		private SearchResult GetRiffItems(string riffName)
293		{
294			//TODO change this, ShowHide module may hide items
295			return SocialSystem.Current.SearchItems(
296						 RenderContext.CreateContext(new Uri("http://host/space"), riffName)
297					 );
298		}
299
300		public IEnumerable<Classifier> GetClassifiers(string riffName)
301		{
302			return GetClassifiers(riffName, null);
303		}
304
305		public Classifier GetClassifier(string riffName, string tagName)
306		{
307			return GetClassifiers(riffName, x => x.Name == tagName).SingleOrDefault();
308		}
309
310		private IEnumerable<Classifier> GetClassifiers(string riffName, Expression<Func<Tag,bool>> tagsFilter)
311		{
312            var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
313
314			using (var repo = commentRepository.Create())
315			{
316				var tags = (repo as DataContext).GetTable<Tag>().AsQueryable();
317				if (tagsFilter != null)
318					tags = tags.Where(tagsFilter);
319
320                IAnalyticsMaster master = analyticsFactory.Load(riff.Id);
321                IEnumerable<KeyValuePair<string, Perceptron>> perceptrons;
322
323                perceptrons = master.Perceptrons;
324
325                if (tagsFilter != null)
326                {
327                    perceptrons = perceptrons.Where(p => tags.Select(t => t.Name).Contains(p.Key));
328                }
329
330                DocumentSpace space = master.DocumentSpace;
331
332				return perceptrons.Select(x =>
333					new Classifier
334					{
335						TagName = x.Key,
336                        Perceptron = x.Value,
337					}).ToList();
338			}
339		}
340
341		private HtmlTextReader GetHtmlTextReader(Item item, List<Comment> comments)
342		{
343			StringBuilder contentBuilder = new StringBuilder();
344			contentBuilder.Append(item.Title);
345			contentBuilder.Append(" ");
346			contentBuilder.Append(item.Title);
347			contentBuilder.Append(" ");
348			contentBuilder.Append(item.Summary);
349			contentBuilder.Append(" ");
350			contentBuilder.Append(item.Content);
351
352            if (comments != null)
353            {
354                foreach (var comment in comments)
355                {
356                    contentBuilder.Append(" ");
357                    contentBuilder.Append(comment.Text);
358                }
359            }
360			
361			var reader = new HtmlTextReader(new StringReader(contentBuilder.ToString()));
362			return reader;
363		}
364
365		public static string Content(SyndicationContent content)
366		{
367			if (content is TextSyndicationContent)
368			{
369				return (content as TextSyndicationContent).Text;
370			}
371			else
372			{
373				return string.Empty;
374			}
375		}
376
377		public void UpdateRiff(string riffName)
378		{
379			SecurityGuard.GuardAction(securable.From(riffName), SecurableActions.Update);
380
381            var riff = riffDirectoryService.ReadSocial(RiffFilters.ByName(riffName));
382
383            // Get the items of this riff
384            var itemsResult = GetRiffItems(riffName);
385
386            var commentsByItemId = itemsResult.DataContext.GetTable<Comment>()
387                    .Where(c => itemsResult.Items.Any(i => i.Id == c.ItemId))
388                    .ToList()
389                    .GroupBy(c => c.ItemId)
390                    .ToDictionary(g => g.Key, g => g.ToList());
391
392            // Get tags of those items
393            var tagsForItem = taggingModule
394                .GetTagsForItems(itemsResult.DataContext, itemsResult.Items)
395                .ToDictionary(g => g.Key, g => g.ToList());
396
397            using (var fileLock = analyticsFactory.GetFileLockFor(riff.Id))
398            {
399                // Load the old master for this riff
400                IAnalyticsMaster oldMaster = analyticsFactory.Load(riff.Id);
401
402                // Create a new master for this riff
403                IAnalyticsMaster newMaster = analyticsFactory.Create(riff.Id);
404
405                // Create documents for each of them
406                // (count word frequencies, build tables, etc.)
407                foreach (var item in itemsResult.Items)
408                {
409                    List<Comment> comments;
410                    commentsByItemId.TryGetValue(item.Id, out comments);
411
412                    var reader = GetHtmlTextReader(item, comments);
413                    var doc = newMaster.DocumentSpace.AddDocument(ToDocumentId(item.Id), reader);
414                    reader.Dispose();
415                }
416
417                // Update classifiers
418                foreach (var tag in taggingModule.GetTagsForSocial(riffName).Distinct(new TagOwnerComparer()))
419                {
420                    UpdateClassifier(tag.Tag, tagsForItem, oldMaster, newMaster);
421                }
422
423                newMaster.Flush();
424            }
425		}
426
427        class TagOwnerComparer : IEqualityComparer<TagOwner>
428        {
429            public bool Equals(TagOwner x, TagOwner y)
430            {
431                return x.Tag == y.Tag;
432            }
433
434            public int GetHashCode(TagOwner obj)
435            {
436                return obj.Tag.GetHashCode();
437            }
438        }
439
440		private List<Item> GetItems(List<Guid> similarItemsIds)
441		{
442			if (similarItemsIds.Count == 0)
443				return new List<Item>();
444			else
445			{
446				using (var riffRepo = syndicationRepository.Create())
447				{
448					return riffRepo.ReadItems().Where(x => similarItemsIds.Contains(x.Id)).ToList();
449				}
450			}
451		}
452	}
453}