csvlogtailer /CsvLogTailer/CsvLogTailer.cs

Language C# Lines 514
MD5 Hash 42265bc2fe5c056afd0d5641e4049487 Estimated Cost $10,676 (why?)
Repository https://bitbucket.org/emertechie/csvlogtailer View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reactive;
using System.Reactive.Disposables;
using System.Reactive.Linq;
using System.Reactive.Subjects;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using CsvLogTailing.Bookmarks;
using FParsec;

namespace CsvLogTailing
{
	public class CsvLogTailer
	{
		private readonly TimeSpan logDirectoryPollTimeSpan = TimeSpan.FromSeconds(30);
		private readonly TimeSpan filePollTimeSpan = TimeSpan.FromSeconds(0.5);

		private readonly Subject<Exception> exceptionsSubject;
		protected readonly ISubject<Exception, Exception> SyncedExceptionsSubject;

		private readonly object parsingLock = new object();
		private int logsReadSinceLastGarbageCollect = 0;
		private int? forceMemoryCollectionThreshold;
		
		public CsvLogTailer(int? forceMemoryCollectionThreshold = null)
		{
			this.forceMemoryCollectionThreshold = forceMemoryCollectionThreshold;

			exceptionsSubject = new Subject<Exception>();
			// Exceptions can be raised concurrently on different threads, so protect access to subject to ensure sequential notifications:
			SyncedExceptionsSubject = Subject.Synchronize(exceptionsSubject);
		}

		public IObservable<Exception> Exceptions
		{
			get { return exceptionsSubject; }
		}

		public IObservable<LogRecord> Tail(CsvLogTailerSettings settings)
		{
			return Tail(settings, new NullLogFileBookmarkRepository());
		}

		public IObservable<LogRecord> Tail(CsvLogTailerSettings settings, ILogFileBookmarkRepository logFileBookmarkRepository)
		{
			if (settings == null) throw new ArgumentNullException("settings");
			if (logFileBookmarkRepository == null) throw new ArgumentNullException("logFileBookmarkRepository");

			bool isADirectory = Directory.Exists(settings.FileOrDirectoryPath);

			IObservable<LogRecord> logRecordsObs = isADirectory
				? GetAllFileChangesForDirectory(settings, logFileBookmarkRepository)
					.Merge()
				: GetFileChanges(
					settings.FileOrDirectoryPath,
					settings.Encoding,
					GetColumnsForFile(settings.FileOrDirectoryPath, settings),
					settings.DateTimeColumnIndex,
					logFileBookmarkRepository);

			return Observable.Create<LogRecord>(observer =>
			{
				var sharedObservable = logRecordsObs.Publish();

				var subscription1 = sharedObservable.Subscribe(observer);

				var subscription2 = sharedObservable
					.GroupBy(x => x.FilePath)
					.Subscribe(group =>
						{
							group
								.SampleResponsive(settings.BookmarkRepositoryUpdateFrequency)
								.Subscribe(logRec =>
									{
										try
										{
											logFileBookmarkRepository.AddOrUpdate(new LogFileBookmark(logRec.FilePath, logRec.LogDateTime));
										}
										catch (Exception bookmarkException)
										{
											SyncedExceptionsSubject.OnNext(bookmarkException);
										}
									});
						});

				return new CompositeDisposable(sharedObservable.Connect(), subscription1, subscription2);
			});
		}

		private IObservable<LogRecord> GetFileChanges(
			string filePath,
			Encoding encoding,
			string[] possiblyNullColumnNames,
			int dateTimeColumnIndex,
			ILogFileBookmarkRepository logFileBookmarkRepository)
		{
			if (possiblyNullColumnNames != null && dateTimeColumnIndex >= possiblyNullColumnNames.Length)
				throw new ArgumentOutOfRangeException("dateTimeColumnIndex", "DateTime column index is greater than number of columns");

			var lastKnownPosition = logFileBookmarkRepository.Get(filePath);

			return Observable.Create<LogRecord>(observer =>
			{
				var disposable = new CompositeDisposable();
				var cancellationTokenSource = new CancellationTokenSource();

				int sharingExceptions = 0;

				Task fileWatcherTask = Task.Factory.StartNew(() =>
					{
						do
						{
							try
							{
								TailFile(filePath, encoding, possiblyNullColumnNames, dateTimeColumnIndex, observer, cancellationTokenSource, lastKnownPosition);

								sharingExceptions = 0;
							}
							catch (FileNotFoundException)
							{
								WaitUntilFileCreated(filePath, cancellationTokenSource);
							}
							catch (IOException ioex)
							{
								if (ioex.Message.Contains("because it is being used by another process") && ++sharingExceptions < 10)
									Thread.Sleep(250);
								else
									throw;
							}
							catch (Exception ex)
							{
								observer.OnError(ex);
								throw;
							}
						}
						while (!cancellationTokenSource.IsCancellationRequested);
					},
					TaskCreationOptions.LongRunning);

				// Make sure any Task exception is observed
				fileWatcherTask.ContinueWith(
					t => observer.OnError(new Exception("Error while tailing file. See inner exception for more details", t.Exception)),
					TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);

				var signalEnd = Disposable.Create(() =>
				{
					cancellationTokenSource.Cancel();
					fileWatcherTask.Wait(TimeSpan.FromSeconds(Debugger.IsAttached ? 120 : 2));
				});

				disposable.Add(signalEnd);
				return disposable;
			});
		}

		private static string[] GetColumnsForFile(string filePath, CsvLogTailerSettings settings)
		{
			return settings.ColumnNamesProvider != null ? settings.ColumnNamesProvider(filePath) : null;
		}

		private IObservable<IObservable<LogRecord>> GetAllFileChangesForDirectory(CsvLogTailerSettings settings, ILogFileBookmarkRepository logFileBookmarkRepository)
		{
			return Observable.Create<IObservable<LogRecord>>(observer =>
			{
				var fileTailerSubscriptions = new Dictionary<string, IDisposable>();

				var directoryChangesSubscription = GetDirectoryChanges(settings.FileOrDirectoryPath, settings.DirectoryFilter)
					.Subscribe(change =>
					{
						if (settings.FileNameExcludeRegex != null && settings.FileNameExcludeRegex.IsMatch(Path.GetFileName(change.Path)))
							return;

						if (change.ChangeType == FileTailingChangeType.StartTailing)
						{
							string[] columnsForFile = GetColumnsForFile(change.Path, settings);

							IObservable<LogRecord> fileChanges = GetFileChanges(
								change.Path,
								settings.Encoding,
								columnsForFile,
								settings.DateTimeColumnIndex,
								logFileBookmarkRepository);

							// Putting a thin wrapper around the 'fileChanges' observable so we can immediately dispose the subscription for individual files
							// and free up resources associated with it. Otherwise, they may not get freed until program shutdown.
							IObservable<LogRecord> wrappedFileChanges = Observable.Create<LogRecord>(fileChangesObserver =>
								{
									IDisposable subscription = fileChanges.Subscribe(fileChangesObserver);
									fileTailerSubscriptions.Add(change.Path, subscription);
									return () => { };
								});
							observer.OnNext(wrappedFileChanges);
						}
						else
						{
							var subscription = fileTailerSubscriptions[change.Path];
							subscription.Dispose();
							fileTailerSubscriptions.Remove(change.Path);
						}
					});

				var stopWatchingFileChanges = Disposable.Create(() =>
				{
					foreach (IDisposable fileChangesSubscription in fileTailerSubscriptions.Values)
						fileChangesSubscription.Dispose();
				});

				return new CompositeDisposable(directoryChangesSubscription, stopWatchingFileChanges);
			});
		}

		private IObservable<FileTailingChange> GetDirectoryChanges(string directoryPath, string directoryFilter)
		{
			string filter = directoryFilter ?? "*.*";
			var watcher = new FileSystemWatcher(directoryPath, filter)
				{
					// For some reason you need to specify this filter for delete notifications to work...
					NotifyFilter = NotifyFilters.FileName
				};
			watcher.Error += (sender, args) =>
				{
					var exception = args.GetException();
					SyncedExceptionsSubject.OnNext(new Exception("Error from FileSystemWatcher: " + exception.Message, exception));
				};

			var trackedPaths = new ConcurrentDictionary<string, bool>();

			return Observable.Create<FileTailingChange>(observer =>
				{
					var fswLock = new object();

					var syncedObserver = Observer.Synchronize(observer);
					IObservable<FileTailingChange> fileSystemWatcherChanges = GetFileSystemWatcherChanges(watcher)
						.Where(x =>
							{
								lock (fswLock)
								{
									bool ignored;
									return x.ChangeType == FileTailingChangeType.StartTailing
										       ? trackedPaths.TryAdd(x.Path, true)
										       : trackedPaths.TryRemove(x.Path, out ignored);
								}
							});

					watcher.EnableRaisingEvents = true;

					var cts = new CancellationTokenSource();
					Task.Factory.StartNew(() =>
						{
							do
							{
								var files = Directory.EnumerateFiles(directoryPath, filter);

								lock (fswLock)
								{
									foreach (string file in files)
									{
										if (trackedPaths.TryAdd(file, true))
											syncedObserver.OnNext(new FileTailingChange(file, FileTailingChangeType.StartTailing));
									}
								}
								cts.Token.WaitHandle.WaitOne(logDirectoryPollTimeSpan);
							}
							while (!cts.IsCancellationRequested);
						},
						cts.Token)
						.ContinueWith(
							t => observer.OnError(t.Exception),
							TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);

					var stopFswDisposable = Disposable.Create(() => watcher.EnableRaisingEvents = false);
					var fileWatcherSubscription = fileSystemWatcherChanges.Subscribe(syncedObserver);
					var stopTaskDisposable = Disposable.Create(cts.Cancel);
					return new CompositeDisposable(stopFswDisposable, fileWatcherSubscription, stopTaskDisposable);
				});
		}

		private static IObservable<FileTailingChange> GetFileSystemWatcherChanges(FileSystemWatcher watcher)
		{
			var created = Observable.FromEventPattern<FileSystemEventHandler, FileSystemEventArgs>(
				handler => watcher.Created += handler,
				handler => watcher.Created -= handler)
				.Select(x => new[] {new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StartTailing)});

			// TODO: We won't get delete events for log files we have open. Noticed that if you delete file in Win Explorer and then refreshed the 
			// directory, the file reappeared. See: http://superuser.com/questions/105786/windows-7-files-reappear-after-deletion
			// Will probably need to periodically close file streams and try to reopen (hopefully file is free to be deleted then)
			var deleted = Observable.FromEventPattern<FileSystemEventHandler, FileSystemEventArgs>(
				handler => watcher.Deleted += handler,
				handler => watcher.Deleted -= handler)
				.Select(x => new[] {new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StopTailing)});

			var renamed = Observable.FromEventPattern<RenamedEventHandler, RenamedEventArgs>(
				handler => watcher.Renamed += handler,
				handler => watcher.Renamed -= handler)
				.Select(x => new[]
					{
						new FileTailingChange(x.EventArgs.OldFullPath, FileTailingChangeType.StopTailing),
						new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StartTailing)
					});

			return Observable.Merge(created, deleted, renamed).SelectMany(x => x);
		}

		private void WaitUntilFileCreated(string filePath, CancellationTokenSource cancellationTokenSource)
		{
			var fileCreated = new ManualResetEventSlim(false);
			cancellationTokenSource.Token.Register(fileCreated.Set);

			// Note: FileSystemWatcher just doesn't work which is why it's not used here
			while (!fileCreated.Wait(TimeSpan.FromSeconds(1)))
			{
				if (File.Exists(filePath))
					fileCreated.Set();
			}
		}

		private void TailFile(
			string filePath,
			Encoding encoding,
			string[] possiblyNullColumnNames,
			int dateTimeColumnIndex,
			IObserver<LogRecord> observer,
			CancellationTokenSource cancellationTokenSource,
			LogFileBookmark lastKnownPosition = null)
		{
			using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete))
			{
				DateTime minLogDateTimeFilter = DateTime.MinValue;
				if (lastKnownPosition != null)
				{
					minLogDateTimeFilter = lastKnownPosition.LogDateTime;
				}

				long lastStreamPos = fileStream.Position;

				do
				{
					while (!cancellationTokenSource.IsCancellationRequested && fileStream.Length != lastStreamPos)
					{
						ReadNext(filePath, fileStream, encoding, possiblyNullColumnNames, dateTimeColumnIndex, next =>
							{
								// Note: Deliberate use of '>=' condition below because date time format used in log file may not have enough
								// resolution for high frequency logs. Seeking to last position should give you exact starting point anyway.
								// Filtering by date is just an additional failsafe to prevent outputting tons of old logs again
								if (next.LogDateTime >= minLogDateTimeFilter)
									observer.OnNext(next);
							});

						if (fileStream.Position == lastStreamPos)
							break;

						lastStreamPos = fileStream.Position;
					}

					if (cancellationTokenSource.IsCancellationRequested)
						break;

					Thread.Sleep(filePollTimeSpan);
				}
				while (!cancellationTokenSource.IsCancellationRequested);
			}
		}

		private void ReadNext(string filePath, Stream stream, Encoding encoding, string[] possiblyNullColumnNames, int dateTimeColumnIndex, Action<LogRecord> action)
		{
			/*
			 * TODO:
			 * 
			 * - What about if file is archived off in middle of trying to recover from exception? Will that really screw things up
			 * - Maybe record stream end position on entering method and ensure we never read past that somehow?
			 * 
			 */

			// Reset stream position if file is truncated (or larger file is overwritten with smaller file)
			if (stream.Position > stream.Length)
				stream.Position = 0;

			var originalStreamPosition = stream.Position;
			Exception lastException = null;

			bool leaveOpen = true;
			var charStream = CreateCharStream(stream, leaveOpen, encoding);

			int skippedLines = 0;

			try
			{
				do
				{
					try
					{
						var parser = new CsvParser.CsvParser('|');
						
						// Big-ass lock. Necessary to prevent temporary memory explosion on startup if there are lots of existing logs to be read.
						// Clearly there must be a better way, but has to do for now. Also protects access to logsReadSinceLastGarbageCollect and hence calls to GC.Collect
						lock (parsingLock)
						{
							var nextRecords = parser.ParseCharStream(charStream)
								.Where(fields => fields.Any() && !String.IsNullOrWhiteSpace(fields[0])) // <<< TODO: Can remove this when parser fixed
								.Where(fields => !String.IsNullOrWhiteSpace(fields[dateTimeColumnIndex]))
								.Select(fields => new LogRecord(filePath, DateTime.Parse(fields[dateTimeColumnIndex]), fields, possiblyNullColumnNames));

							foreach (var nextRecord in nextRecords)
							{
								action(nextRecord);
								++logsReadSinceLastGarbageCollect;
							}

							if (forceMemoryCollectionThreshold.HasValue && logsReadSinceLastGarbageCollect >= forceMemoryCollectionThreshold.Value)
							{
								GC.Collect();
								logsReadSinceLastGarbageCollect = 0;
							}
						}

						break;
					}
					catch (Exception exception)
					{
						if (lastException == null)
						{
							lastException = exception;
							SyncedExceptionsSubject.OnNext(lastException);
						}

						// Reset everything and...
						stream.Position = originalStreamPosition;
						charStream.Dispose();
						charStream = CreateCharStream(stream, leaveOpen, encoding);

						try
						{
							// ... try again from next line down
							++skippedLines;
							for (int i = 0; i < skippedLines; i++)
								charStream.SkipRestOfLine(skipNewline: true);

							// string whereAmI = charStream.PeekString(50);

							if (charStream.IsEndOfStream)
								return;
						}
						catch (Exception)
						{
							// TODO: Is this recoverable?

							throw;
						}
					}
				}
				while (true);
			}
			finally
			{
				charStream.DisposeIfNotNull();
			}
		}

		private CharStream<CsvParserModule.CsvParserState> CreateCharStream(Stream stream, bool leaveOpen, Encoding encoding)
		{
			return new CharStream<CsvParserModule.CsvParserState>(stream, leaveOpen, encoding)
			{
				UserState = new CsvParserModule.CsvParserState('|')
			};
		}

		private enum FileTailingChangeType
		{
			StartTailing,
			StopTailing
		}

		private class FileTailingChange
		{
			private readonly string path;
			private readonly FileTailingChangeType changeType;

			public FileTailingChange(string path, FileTailingChangeType changeType)
			{
				this.path = path;
				this.changeType = changeType;
			}

			public string Path
			{
				get { return path; }
			}

			public FileTailingChangeType ChangeType
			{
				get { return changeType; }
			}
		}

		private class NullLogFileBookmarkRepository : ILogFileBookmarkRepository
		{
			public LogFileBookmark Get(string filePath)
			{
				return null;
			}

			public void AddOrUpdate(LogFileBookmark bookmark)
			{
			}
		}
	}
}
Back to Top