/branches/0.15.x/src/MDownloader.Hostings/Providers/EasyShare/SiteBrowser.cs
C# | 248 lines | 197 code | 45 blank | 6 comment | 11 complexity | 4183f3b81dfa1ff73cc5cd49af6c73e0 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, CC-BY-SA-3.0
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using Html.Engine.Browsing;
- using Html.Engine.Domain;
- using Html.Engine.Domain.Core;
- using Html.Engine.Domain.Forms;
- using MDownloader.Hostings.Common;
- using MDownloader.Hostings.Core;
- using MDownloader.Hostings.Exceptions;
- using MDownloader.Hostings.Providers.EasyShare.Stages;
- using Melon.Commons.Extensions;
- using Html.Engine.Domain.Scripting;
-
- namespace MDownloader.Hostings.Providers.EasyShare
- {
- [HostingProvider]
- public sealed class SiteBrowser : HostingBase<SiteInfo>
- {
- #region Constants
- private static readonly string MainPage = @"http://www.easy-share.com/";
- private static readonly Regex GeneratedResourceRegex = new Regex("\"(?<link>http://www.easy-share.com/[0-9]+/[^\"]+)\"");
- private static readonly Regex DeleteLinkRegex = new Regex("http://www.easy-share.com/del/[0-9a-zA-Z]+.html");
- #endregion
-
- #region Overriden Members
- public override IHostingInfo GetInfo()
- {
- return SiteInfo.Instance;
- }
-
- protected override bool OnCanHandle(Uri link)
- {
- return link.Host.EndsWith("easy-share.com");
- }
-
- public override ResourceMetadata GetResourceMetadata(IDownloadContext context, IRetrievable target, DownloadSpecification specification)
- {
- var browser = context.CreateBrowser();
- this.ConfigureBrowser(browser, specification);
-
- var page = browser.NavigateTo(target.Link.ToString());
- this.CheckIfRemoved(page);
-
- var section = page.GetElementById("ucontent");
- var iName = section.GetDescendants<Literal>().First(x => x.GetOwner().ClassName == "px18 pb10");
- var iSize = iName.GetOwner().GetDescendants<Literal>().Last();
-
- var size = ResourceSizeHelper.FindSizeFromTheEnd(iSize.Content);
- var name = ResourceNameHelper.RemoveStringReferences(iName.Content);
-
- return new ResourceMetadata(name, size);
- }
-
- protected override ResourceData OnGetResourceData(IDownloadContext context, IRetrievable target, DownloadSpecification specification)
- {
- byte[] iCaptcha = null;
- string tCaptcha = null;
-
- var browser = context.CreateBrowser();
- this.ConfigureBrowser(browser, specification);
-
- var configuration = SiteInfo.Instance.GetConfiguration();
- var page = browser.NavigateTo(target.Link.ToString());
-
- // If user wants to use registered account we must log in first.
- if (configuration.Accounts.Preferences == AccountsSectionPart.UsagePreferences.Registered)
- {
- var account = configuration.Accounts.GetAccounts().RandomOrDefault();
-
- if (account != null)
- {
- var loginStage = new LogInStage(page);
-
- loginStage.SetCredentials(account.Username, account.Password);
- loginStage.LogIn(browser);
- }
- }
-
- this.CheckIfRemoved(page);
- this.CheckIfBusy(page);
-
- context.Wait(TimeSpan.FromSeconds(20));
-
- var tRegex = new Regex("w\\s*=\\s*'(?<seconds>[0-9]+)'");
- var tQuery = from script in page.GetScripts()
- where !String.IsNullOrEmpty(script.Content)
- let tMatch = tRegex.Match(script.Content)
- where tMatch.Success
- select Int32.Parse(tMatch.Groups["seconds"].Value);
-
- context.Wait(TimeSpan.FromSeconds(tQuery.FirstOrDefault()));
-
- var cRegex = new Regex("u\\s*=\\s*'(?<url>[^']+)'");
- var cQuery = from script in page.GetScripts()
- where !String.IsNullOrEmpty(script.Content)
- let cMatch = cRegex.Match(script.Content)
- where cMatch.Success
- select cMatch.Groups["url"].Value;
-
- var cForm = browser.NavigateToAs<Form>(cQuery.First());
- var cFormId = new Uri(cForm.Action.Location).Segments[4].Trim('/');
- var cScript = browser.NavigateToAs<ScriptElement>(cQuery.First());
- var hRegex = new Regex("Recaptcha\\.create\\s*\\(\\s*\"(?<id>[^\"]+)\"");
- var hQuery = hRegex.Match(cScript.Content).Groups["id"].Value;
-
- var gUrl = String.Format("http://www.google.com/recaptcha/api/challenge?k={0}&ajax=1&cachestop=0.03379133495216441", hQuery);
- var iUrl = String.Empty;
- var iChallenge = String.Empty;
-
- using (var resource = browser.GetResourceData(gUrl, ResourceRange.Default))
- {
- var java = resource.OpenStream().ReadStringToEnd();
- var regex = new Regex("challenge\\s*:\\s*'(?<id>[^']*)'");
- var match = regex.Match(java);
-
- iChallenge = match.Groups["id"].Value;
- iUrl = String.Format(@"http://api.recaptcha.net/image?c={0}", iChallenge);
- }
-
- using (var resource = browser.GetResourceData(iUrl, ResourceRange.Default))
- {
- iCaptcha = resource.OpenStream().ReadBytesToEnd();
- tCaptcha = context.Recognize(CaptchaRecognitionMode.Default, iCaptcha, new CaptchaDescriptor());
- }
-
- using (var resource = browser.GetResourceData(new CaptchaRequest(cForm.Action.Location, cFormId, iChallenge, tCaptcha), this.GetResourceRange(specification)))
- {
- if (!resource.IsBinaryStream())
- throw new BadPasscodeException(iCaptcha, tCaptcha, this.GetInfo());
-
- return new ResourceData(resource.OpenStream(), resource.Size, ResourceDataType.Binary);
- }
- }
-
- protected override Common.AccountInfo[] OnGetAvailableAccounts()
- {
- var accounts = from account in SiteInfo.Instance.GetConfiguration().Accounts.GetAccounts()
- select new Common.AccountInfo(account.Id, account.Username);
-
- return accounts.ToArray();
- }
-
- protected override UploadResult OnUploadData(IDownloadContext context, IUploadable target, UploadSpecification specification)
- {
- var browser = context.CreateBrowser(TimeSpan.FromMinutes(2));
- this.ConfigureBrowser(browser, specification);
-
- var configuration = SiteInfo.Instance.GetConfiguration();
- var uploadPage = browser.NavigateTo(SiteBrowser.MainPage);
- var account = configuration.Accounts.GetAccounts().FirstOrDefault(x => x.Id == target.Account);
-
- if (account != null)
- {
- var loginStage = new LogInStage(uploadPage);
-
- loginStage.SetCredentials(account.Username, account.Password);
- loginStage.LogIn(browser);
- }
-
- var uploadStage = new UploadStage(uploadPage);
- var form = uploadStage.SetData(target.Name, target.Description, target.Size, target.OpenDataStream());
-
- using (var result = browser.GetResourceData(form, ResourceRange.Default))
- {
- var data = result.OpenStream().ReadStringToEnd();
- var link = SiteBrowser.GeneratedResourceRegex.Match(data).Groups["link"].Value;
- var delete = SiteBrowser.DeleteLinkRegex.Match(data).Value;
-
- return new UploadResult(new Uri(link), new Uri(delete));
- }
- }
- #endregion
- #region Private Members
- private void CheckIfRemoved(Page page)
- {
- if (page.GetElementById("tabCaptcha") == null)
- throw new NotFoundException();
- }
-
- private void CheckIfBusy(Page page)
- {
- var form = page.GetElementById("tabCaptcha");
- //var timer = page.GetElementById("freeTimer").GetChildren().First() as Literal;
-
- //if (form == null && timer != null)
- // throw new PostponeException(Convert.ToInt32(ResourceSizeHelper.FindNumberFromTheBeginning(timer.Content)));
-
- //if (form == null)
- // throw new PostponeException();
- }
- #endregion
-
- private class CaptchaRequest : ISubmittable
- {
- private readonly string location;
- private readonly string id;
- private readonly string challenge;
- private readonly string response;
-
- public CaptchaRequest(string location, string id, string challenge, string response)
- {
- this.location = location;
- this.id = id;
- this.challenge = challenge;
- this.response = response;
- }
-
- #region IResourceable Members
- public string Location
- {
- get { return this.location; }
- }
-
- public string Method
- {
- get { return @"POST"; }
- }
-
- public string Encoding
- {
- get { return @"application/x-www-form-urlencoded"; }
- }
-
- public ResourceParameter[] Parameters
- {
- get
- {
- return new[]
- {
- new ResourceParameter("id", this.id),
- new ResourceParameter("recaptcha_challenge_field", this.challenge),
- new ResourceParameter("recaptcha_response_field", this.response),
- };
- }
- }
-
- ResourceParameter[] IResourceable.Headers
- {
- get { return new ResourceParameter[] { }; }
- }
- #endregion
- }
- }
- }