PageRenderTime 46ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/CsvLogTailer/CsvLogTailer.cs

https://bitbucket.org/emertechie/csvlogtailer
C# | 514 lines | 413 code | 75 blank | 26 comment | 40 complexity | 42265bc2fe5c056afd0d5641e4049487 MD5 | raw file
  1. using System;
  2. using System.Collections.Concurrent;
  3. using System.Collections.Generic;
  4. using System.Diagnostics;
  5. using System.IO;
  6. using System.Linq;
  7. using System.Reactive;
  8. using System.Reactive.Disposables;
  9. using System.Reactive.Linq;
  10. using System.Reactive.Subjects;
  11. using System.Text;
  12. using System.Threading;
  13. using System.Threading.Tasks;
  14. using CsvLogTailing.Bookmarks;
  15. using FParsec;
  16. namespace CsvLogTailing
  17. {
  18. public class CsvLogTailer
  19. {
  20. private readonly TimeSpan logDirectoryPollTimeSpan = TimeSpan.FromSeconds(30);
  21. private readonly TimeSpan filePollTimeSpan = TimeSpan.FromSeconds(0.5);
  22. private readonly Subject<Exception> exceptionsSubject;
  23. protected readonly ISubject<Exception, Exception> SyncedExceptionsSubject;
  24. private readonly object parsingLock = new object();
  25. private int logsReadSinceLastGarbageCollect = 0;
  26. private int? forceMemoryCollectionThreshold;
  27. public CsvLogTailer(int? forceMemoryCollectionThreshold = null)
  28. {
  29. this.forceMemoryCollectionThreshold = forceMemoryCollectionThreshold;
  30. exceptionsSubject = new Subject<Exception>();
  31. // Exceptions can be raised concurrently on different threads, so protect access to subject to ensure sequential notifications:
  32. SyncedExceptionsSubject = Subject.Synchronize(exceptionsSubject);
  33. }
  34. public IObservable<Exception> Exceptions
  35. {
  36. get { return exceptionsSubject; }
  37. }
  38. public IObservable<LogRecord> Tail(CsvLogTailerSettings settings)
  39. {
  40. return Tail(settings, new NullLogFileBookmarkRepository());
  41. }
  42. public IObservable<LogRecord> Tail(CsvLogTailerSettings settings, ILogFileBookmarkRepository logFileBookmarkRepository)
  43. {
  44. if (settings == null) throw new ArgumentNullException("settings");
  45. if (logFileBookmarkRepository == null) throw new ArgumentNullException("logFileBookmarkRepository");
  46. bool isADirectory = Directory.Exists(settings.FileOrDirectoryPath);
  47. IObservable<LogRecord> logRecordsObs = isADirectory
  48. ? GetAllFileChangesForDirectory(settings, logFileBookmarkRepository)
  49. .Merge()
  50. : GetFileChanges(
  51. settings.FileOrDirectoryPath,
  52. settings.Encoding,
  53. GetColumnsForFile(settings.FileOrDirectoryPath, settings),
  54. settings.DateTimeColumnIndex,
  55. logFileBookmarkRepository);
  56. return Observable.Create<LogRecord>(observer =>
  57. {
  58. var sharedObservable = logRecordsObs.Publish();
  59. var subscription1 = sharedObservable.Subscribe(observer);
  60. var subscription2 = sharedObservable
  61. .GroupBy(x => x.FilePath)
  62. .Subscribe(group =>
  63. {
  64. group
  65. .SampleResponsive(settings.BookmarkRepositoryUpdateFrequency)
  66. .Subscribe(logRec =>
  67. {
  68. try
  69. {
  70. logFileBookmarkRepository.AddOrUpdate(new LogFileBookmark(logRec.FilePath, logRec.LogDateTime));
  71. }
  72. catch (Exception bookmarkException)
  73. {
  74. SyncedExceptionsSubject.OnNext(bookmarkException);
  75. }
  76. });
  77. });
  78. return new CompositeDisposable(sharedObservable.Connect(), subscription1, subscription2);
  79. });
  80. }
  81. private IObservable<LogRecord> GetFileChanges(
  82. string filePath,
  83. Encoding encoding,
  84. string[] possiblyNullColumnNames,
  85. int dateTimeColumnIndex,
  86. ILogFileBookmarkRepository logFileBookmarkRepository)
  87. {
  88. if (possiblyNullColumnNames != null && dateTimeColumnIndex >= possiblyNullColumnNames.Length)
  89. throw new ArgumentOutOfRangeException("dateTimeColumnIndex", "DateTime column index is greater than number of columns");
  90. var lastKnownPosition = logFileBookmarkRepository.Get(filePath);
  91. return Observable.Create<LogRecord>(observer =>
  92. {
  93. var disposable = new CompositeDisposable();
  94. var cancellationTokenSource = new CancellationTokenSource();
  95. int sharingExceptions = 0;
  96. Task fileWatcherTask = Task.Factory.StartNew(() =>
  97. {
  98. do
  99. {
  100. try
  101. {
  102. TailFile(filePath, encoding, possiblyNullColumnNames, dateTimeColumnIndex, observer, cancellationTokenSource, lastKnownPosition);
  103. sharingExceptions = 0;
  104. }
  105. catch (FileNotFoundException)
  106. {
  107. WaitUntilFileCreated(filePath, cancellationTokenSource);
  108. }
  109. catch (IOException ioex)
  110. {
  111. if (ioex.Message.Contains("because it is being used by another process") && ++sharingExceptions < 10)
  112. Thread.Sleep(250);
  113. else
  114. throw;
  115. }
  116. catch (Exception ex)
  117. {
  118. observer.OnError(ex);
  119. throw;
  120. }
  121. }
  122. while (!cancellationTokenSource.IsCancellationRequested);
  123. },
  124. TaskCreationOptions.LongRunning);
  125. // Make sure any Task exception is observed
  126. fileWatcherTask.ContinueWith(
  127. t => observer.OnError(new Exception("Error while tailing file. See inner exception for more details", t.Exception)),
  128. TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);
  129. var signalEnd = Disposable.Create(() =>
  130. {
  131. cancellationTokenSource.Cancel();
  132. fileWatcherTask.Wait(TimeSpan.FromSeconds(Debugger.IsAttached ? 120 : 2));
  133. });
  134. disposable.Add(signalEnd);
  135. return disposable;
  136. });
  137. }
  138. private static string[] GetColumnsForFile(string filePath, CsvLogTailerSettings settings)
  139. {
  140. return settings.ColumnNamesProvider != null ? settings.ColumnNamesProvider(filePath) : null;
  141. }
  142. private IObservable<IObservable<LogRecord>> GetAllFileChangesForDirectory(CsvLogTailerSettings settings, ILogFileBookmarkRepository logFileBookmarkRepository)
  143. {
  144. return Observable.Create<IObservable<LogRecord>>(observer =>
  145. {
  146. var fileTailerSubscriptions = new Dictionary<string, IDisposable>();
  147. var directoryChangesSubscription = GetDirectoryChanges(settings.FileOrDirectoryPath, settings.DirectoryFilter)
  148. .Subscribe(change =>
  149. {
  150. if (settings.FileNameExcludeRegex != null && settings.FileNameExcludeRegex.IsMatch(Path.GetFileName(change.Path)))
  151. return;
  152. if (change.ChangeType == FileTailingChangeType.StartTailing)
  153. {
  154. string[] columnsForFile = GetColumnsForFile(change.Path, settings);
  155. IObservable<LogRecord> fileChanges = GetFileChanges(
  156. change.Path,
  157. settings.Encoding,
  158. columnsForFile,
  159. settings.DateTimeColumnIndex,
  160. logFileBookmarkRepository);
  161. // Putting a thin wrapper around the 'fileChanges' observable so we can immediately dispose the subscription for individual files
  162. // and free up resources associated with it. Otherwise, they may not get freed until program shutdown.
  163. IObservable<LogRecord> wrappedFileChanges = Observable.Create<LogRecord>(fileChangesObserver =>
  164. {
  165. IDisposable subscription = fileChanges.Subscribe(fileChangesObserver);
  166. fileTailerSubscriptions.Add(change.Path, subscription);
  167. return () => { };
  168. });
  169. observer.OnNext(wrappedFileChanges);
  170. }
  171. else
  172. {
  173. var subscription = fileTailerSubscriptions[change.Path];
  174. subscription.Dispose();
  175. fileTailerSubscriptions.Remove(change.Path);
  176. }
  177. });
  178. var stopWatchingFileChanges = Disposable.Create(() =>
  179. {
  180. foreach (IDisposable fileChangesSubscription in fileTailerSubscriptions.Values)
  181. fileChangesSubscription.Dispose();
  182. });
  183. return new CompositeDisposable(directoryChangesSubscription, stopWatchingFileChanges);
  184. });
  185. }
  186. private IObservable<FileTailingChange> GetDirectoryChanges(string directoryPath, string directoryFilter)
  187. {
  188. string filter = directoryFilter ?? "*.*";
  189. var watcher = new FileSystemWatcher(directoryPath, filter)
  190. {
  191. // For some reason you need to specify this filter for delete notifications to work...
  192. NotifyFilter = NotifyFilters.FileName
  193. };
  194. watcher.Error += (sender, args) =>
  195. {
  196. var exception = args.GetException();
  197. SyncedExceptionsSubject.OnNext(new Exception("Error from FileSystemWatcher: " + exception.Message, exception));
  198. };
  199. var trackedPaths = new ConcurrentDictionary<string, bool>();
  200. return Observable.Create<FileTailingChange>(observer =>
  201. {
  202. var fswLock = new object();
  203. var syncedObserver = Observer.Synchronize(observer);
  204. IObservable<FileTailingChange> fileSystemWatcherChanges = GetFileSystemWatcherChanges(watcher)
  205. .Where(x =>
  206. {
  207. lock (fswLock)
  208. {
  209. bool ignored;
  210. return x.ChangeType == FileTailingChangeType.StartTailing
  211. ? trackedPaths.TryAdd(x.Path, true)
  212. : trackedPaths.TryRemove(x.Path, out ignored);
  213. }
  214. });
  215. watcher.EnableRaisingEvents = true;
  216. var cts = new CancellationTokenSource();
  217. Task.Factory.StartNew(() =>
  218. {
  219. do
  220. {
  221. var files = Directory.EnumerateFiles(directoryPath, filter);
  222. lock (fswLock)
  223. {
  224. foreach (string file in files)
  225. {
  226. if (trackedPaths.TryAdd(file, true))
  227. syncedObserver.OnNext(new FileTailingChange(file, FileTailingChangeType.StartTailing));
  228. }
  229. }
  230. cts.Token.WaitHandle.WaitOne(logDirectoryPollTimeSpan);
  231. }
  232. while (!cts.IsCancellationRequested);
  233. },
  234. cts.Token)
  235. .ContinueWith(
  236. t => observer.OnError(t.Exception),
  237. TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);
  238. var stopFswDisposable = Disposable.Create(() => watcher.EnableRaisingEvents = false);
  239. var fileWatcherSubscription = fileSystemWatcherChanges.Subscribe(syncedObserver);
  240. var stopTaskDisposable = Disposable.Create(cts.Cancel);
  241. return new CompositeDisposable(stopFswDisposable, fileWatcherSubscription, stopTaskDisposable);
  242. });
  243. }
  244. private static IObservable<FileTailingChange> GetFileSystemWatcherChanges(FileSystemWatcher watcher)
  245. {
  246. var created = Observable.FromEventPattern<FileSystemEventHandler, FileSystemEventArgs>(
  247. handler => watcher.Created += handler,
  248. handler => watcher.Created -= handler)
  249. .Select(x => new[] {new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StartTailing)});
  250. // TODO: We won't get delete events for log files we have open. Noticed that if you delete file in Win Explorer and then refreshed the
  251. // directory, the file reappeared. See: http://superuser.com/questions/105786/windows-7-files-reappear-after-deletion
  252. // Will probably need to periodically close file streams and try to reopen (hopefully file is free to be deleted then)
  253. var deleted = Observable.FromEventPattern<FileSystemEventHandler, FileSystemEventArgs>(
  254. handler => watcher.Deleted += handler,
  255. handler => watcher.Deleted -= handler)
  256. .Select(x => new[] {new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StopTailing)});
  257. var renamed = Observable.FromEventPattern<RenamedEventHandler, RenamedEventArgs>(
  258. handler => watcher.Renamed += handler,
  259. handler => watcher.Renamed -= handler)
  260. .Select(x => new[]
  261. {
  262. new FileTailingChange(x.EventArgs.OldFullPath, FileTailingChangeType.StopTailing),
  263. new FileTailingChange(x.EventArgs.FullPath, FileTailingChangeType.StartTailing)
  264. });
  265. return Observable.Merge(created, deleted, renamed).SelectMany(x => x);
  266. }
  267. private void WaitUntilFileCreated(string filePath, CancellationTokenSource cancellationTokenSource)
  268. {
  269. var fileCreated = new ManualResetEventSlim(false);
  270. cancellationTokenSource.Token.Register(fileCreated.Set);
  271. // Note: FileSystemWatcher just doesn't work which is why it's not used here
  272. while (!fileCreated.Wait(TimeSpan.FromSeconds(1)))
  273. {
  274. if (File.Exists(filePath))
  275. fileCreated.Set();
  276. }
  277. }
  278. private void TailFile(
  279. string filePath,
  280. Encoding encoding,
  281. string[] possiblyNullColumnNames,
  282. int dateTimeColumnIndex,
  283. IObserver<LogRecord> observer,
  284. CancellationTokenSource cancellationTokenSource,
  285. LogFileBookmark lastKnownPosition = null)
  286. {
  287. using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete))
  288. {
  289. DateTime minLogDateTimeFilter = DateTime.MinValue;
  290. if (lastKnownPosition != null)
  291. {
  292. minLogDateTimeFilter = lastKnownPosition.LogDateTime;
  293. }
  294. long lastStreamPos = fileStream.Position;
  295. do
  296. {
  297. while (!cancellationTokenSource.IsCancellationRequested && fileStream.Length != lastStreamPos)
  298. {
  299. ReadNext(filePath, fileStream, encoding, possiblyNullColumnNames, dateTimeColumnIndex, next =>
  300. {
  301. // Note: Deliberate use of '>=' condition below because date time format used in log file may not have enough
  302. // resolution for high frequency logs. Seeking to last position should give you exact starting point anyway.
  303. // Filtering by date is just an additional failsafe to prevent outputting tons of old logs again
  304. if (next.LogDateTime >= minLogDateTimeFilter)
  305. observer.OnNext(next);
  306. });
  307. if (fileStream.Position == lastStreamPos)
  308. break;
  309. lastStreamPos = fileStream.Position;
  310. }
  311. if (cancellationTokenSource.IsCancellationRequested)
  312. break;
  313. Thread.Sleep(filePollTimeSpan);
  314. }
  315. while (!cancellationTokenSource.IsCancellationRequested);
  316. }
  317. }
  318. private void ReadNext(string filePath, Stream stream, Encoding encoding, string[] possiblyNullColumnNames, int dateTimeColumnIndex, Action<LogRecord> action)
  319. {
  320. /*
  321. * TODO:
  322. *
  323. * - What about if file is archived off in middle of trying to recover from exception? Will that really screw things up
  324. * - Maybe record stream end position on entering method and ensure we never read past that somehow?
  325. *
  326. */
  327. // Reset stream position if file is truncated (or larger file is overwritten with smaller file)
  328. if (stream.Position > stream.Length)
  329. stream.Position = 0;
  330. var originalStreamPosition = stream.Position;
  331. Exception lastException = null;
  332. bool leaveOpen = true;
  333. var charStream = CreateCharStream(stream, leaveOpen, encoding);
  334. int skippedLines = 0;
  335. try
  336. {
  337. do
  338. {
  339. try
  340. {
  341. var parser = new CsvParser.CsvParser('|');
  342. // Big-ass lock. Necessary to prevent temporary memory explosion on startup if there are lots of existing logs to be read.
  343. // Clearly there must be a better way, but has to do for now. Also protects access to logsReadSinceLastGarbageCollect and hence calls to GC.Collect
  344. lock (parsingLock)
  345. {
  346. var nextRecords = parser.ParseCharStream(charStream)
  347. .Where(fields => fields.Any() && !String.IsNullOrWhiteSpace(fields[0])) // <<< TODO: Can remove this when parser fixed
  348. .Where(fields => !String.IsNullOrWhiteSpace(fields[dateTimeColumnIndex]))
  349. .Select(fields => new LogRecord(filePath, DateTime.Parse(fields[dateTimeColumnIndex]), fields, possiblyNullColumnNames));
  350. foreach (var nextRecord in nextRecords)
  351. {
  352. action(nextRecord);
  353. ++logsReadSinceLastGarbageCollect;
  354. }
  355. if (forceMemoryCollectionThreshold.HasValue && logsReadSinceLastGarbageCollect >= forceMemoryCollectionThreshold.Value)
  356. {
  357. GC.Collect();
  358. logsReadSinceLastGarbageCollect = 0;
  359. }
  360. }
  361. break;
  362. }
  363. catch (Exception exception)
  364. {
  365. if (lastException == null)
  366. {
  367. lastException = exception;
  368. SyncedExceptionsSubject.OnNext(lastException);
  369. }
  370. // Reset everything and...
  371. stream.Position = originalStreamPosition;
  372. charStream.Dispose();
  373. charStream = CreateCharStream(stream, leaveOpen, encoding);
  374. try
  375. {
  376. // ... try again from next line down
  377. ++skippedLines;
  378. for (int i = 0; i < skippedLines; i++)
  379. charStream.SkipRestOfLine(skipNewline: true);
  380. // string whereAmI = charStream.PeekString(50);
  381. if (charStream.IsEndOfStream)
  382. return;
  383. }
  384. catch (Exception)
  385. {
  386. // TODO: Is this recoverable?
  387. throw;
  388. }
  389. }
  390. }
  391. while (true);
  392. }
  393. finally
  394. {
  395. charStream.DisposeIfNotNull();
  396. }
  397. }
  398. private CharStream<CsvParserModule.CsvParserState> CreateCharStream(Stream stream, bool leaveOpen, Encoding encoding)
  399. {
  400. return new CharStream<CsvParserModule.CsvParserState>(stream, leaveOpen, encoding)
  401. {
  402. UserState = new CsvParserModule.CsvParserState('|')
  403. };
  404. }
  405. private enum FileTailingChangeType
  406. {
  407. StartTailing,
  408. StopTailing
  409. }
  410. private class FileTailingChange
  411. {
  412. private readonly string path;
  413. private readonly FileTailingChangeType changeType;
  414. public FileTailingChange(string path, FileTailingChangeType changeType)
  415. {
  416. this.path = path;
  417. this.changeType = changeType;
  418. }
  419. public string Path
  420. {
  421. get { return path; }
  422. }
  423. public FileTailingChangeType ChangeType
  424. {
  425. get { return changeType; }
  426. }
  427. }
  428. private class NullLogFileBookmarkRepository : ILogFileBookmarkRepository
  429. {
  430. public LogFileBookmark Get(string filePath)
  431. {
  432. return null;
  433. }
  434. public void AddOrUpdate(LogFileBookmark bookmark)
  435. {
  436. }
  437. }
  438. }
  439. }