/worker.py
Python | 386 lines | 361 code | 8 blank | 17 comment | 1 complexity | 666edbb5ed27c9d641a10967d57d85f6 MD5 | raw file
Possible License(s): GPL-3.0
- # Download Manager - A utility for queing and downloading very larg files over http.
- # Copyright (C) 2009-2011 Ben Russell, br@x-plugins.com
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- import urllib
- import urllib2
- import os
- import time
- #import sys
- import threading
- import string
- import socket
- from worker_fetchfile import Worker_FetchFile
- #this is the background work thread interface that the GUI talks to.
- class WorkerPool( threading.Thread ):
- def __init__(self):
- threading.Thread.__init__(self)
-
- self.outputFolder = ""
-
- self.timeToQuit = threading.Event()
- self.timeToQuit.clear()
-
- #10 second time out on Socket based operations.
- timeout = 10
- socket.setdefaulttimeout( timeout )
- self.fetch_pool = []
- self.max_concurrent_transfers = 2
-
- self.total_bytesPerSecond = 0
-
- self.total_bytesInPool = 0
- self.total_megabytesInPool = 0.0
- self.throttle_target = 89
- self.throttle = 75
-
- self.worker_block_size = 8192
-
- self.eta = "..."
-
- #these are broken down to represent the entire duration, none of them is a sum total
- self.last_eta_minutes = 0.0
- self.last_eta_hours = 0.0
- self.last_eta_days = 0.0
-
- #this is used to keep a total of the minutes require for this xfer. we then use this to build the "total ETA"
- #used to display progress of the entire package.
- self.eta_minutes_total = 0.0
-
- self.username = ""
- self.password = ""
- self.download_key = ""
- self.valid_key = False #this will be set if we manage to download a valid TOC
- self.mirror_id = -1
- self.mirror = ""
-
- self.package_name = ""
- self.package_info = ""
-
- self.package_urls_raw = []
-
- self.transfer_active = False
- def log(self,msg):
- #pass
- print(msg)
- """
- This function downloads the table of contents for a given purchase key.
- Key may be "demo" or an MD5 hash.
- """
- def download_package_info(self, package_name="demo"):
-
- import hashlib
-
- keylength = len(self.download_key)
- if( keylength != len(hashlib.md5("foo").hexdigest()) and keylength != len("demo") ):
- return
-
- #TODO: Make this configurable from a global config file for the app.
- base_url = "http://x-aviation.com/RealScenery10x/"
-
- params = urllib.urlencode( {'u':self.username, 'p':self.password, 'k':self.download_key, 'm':self.mirror} )
- #self.log(params)
-
- request_url = "%s?%s" %(base_url, params)
-
- self.valid_key = False # We don't know if the key is valid until we have asked the server.
-
- self.log("Requesting TOC: %s" %(request_url))
- try:
- fh = urllib2.urlopen( request_url )
- data = fh.read()
- fh.close()
-
- #FIXME: Parse the recieved TOC data and ensure that it looks valid.
- #bug: https://bitbucket.org/xplugins/download-manager/issue/4
-
- self.valid_key = True
- self.log("TOC Data:\n%s" %(data))
- except:
- self.log("Failed to get TOC data.")
- pass
- return
-
-
- lines = string.split(data,"\n")
- if( len(lines) > 1 ):
- self.package_name_raw = lines[0] # The first line contains package summary data.
- self.package_urls_raw = lines[1: ] # All other lines are assets to download.
-
-
- tokens = string.split(self.package_name_raw,"::")
- self.package_name = tokens[0]
- self.package_info = tokens[1]
-
- if( self.package_name == "" ):
- self.log("Error: Package name is blank.")
- return
- """
- This file downloads an asset from a scenery package, details are specified by the TOC data.
- """
- def download_package(self, package_name="demo"):
-
- self.transfer_active = True
-
- if( len(self.package_urls_raw) == 0 ):
- self.download_package_info(package_name=package_name)
-
- # Filtering and collection of the url list is done in the function download_package_info
-
- params = urllib.urlencode( {'u':self.username, 'p':self.password, 'k':self.download_key, 'm':self.mirror} )
-
- for r_u in self.package_urls_raw:
- if( r_u != "" ):
- tokens = string.split(r_u,"::")
-
- fetch_url = tokens[0]
- manifest_size = int(tokens[1])
- url_tokens = string.split(fetch_url, "/")
-
- filename = url_tokens[-1]
-
- full_output_file_path = "%s%s/%s" %(self.outputFolder, self.package_name, filename)
-
- package_label = "%s: %s" %(self.package_name, filename)
-
- tmp_url = "%s?%s" %(fetch_url, params)
-
-
- #FIXME: This code is hackish and ugly.
- # False filter to speed testing - allows downloading only small test files contained inside a well formed TOC.
- #if( fetch_url.endswith(".txt") or fetch_url.endswith("AZ.zip") ):
- #if( fetch_url.endswith(".txt") ):
- if( True ): #all files!
- self.queue_url(gui_label=package_label, url=tmp_url, outfile=full_output_file_path, manifest_size=manifest_size)
- """
- This function iterates the worker pool and calculates the active transfer rate for each item.
- Interval is allowed as an argument to allow control from our parent which will likely be a GUI or a thread.
- """
- def calculate_speed(self, interval):
-
- self.eta_minutes_total = 0.0 #reset accumulator
-
- mult = 1.0 / interval
-
- for worker in self.fetch_pool:
- #self.log('brx: %i\nlrx: %i' %(worker.bytesRecieved, worker.last_bytesRecieved))
- #d = (worker.bytesRecieved - worker.last_bytesRecieved)
- #self.log('d: %i' %(d))
-
- bytes_delta = worker.bytesExpected - worker.sock_byte_count
-
- worker.bytesPerSecond = (worker.bytesRecieved - worker.last_bytesRecieved) * mult
- worker.last_bytesRecieved = worker.bytesRecieved
- worker.speed = "%0.2f" %(worker.bytesPerSecond / 1024.0)
- eta_minutes = 0.0
- try:
- eta_minutes = (bytes_delta / worker.bytesPerSecond) / 60.0
-
- self.eta_minutes_total += eta_minutes
-
- eta_string = "..."
- if( eta_minutes <= 60.0 ):
- eta_string = "%.2f mins" %((eta_minutes+self.last_eta_minutes)/2.0)
- self.last_eta_minutes = eta_minutes
- else:
- eta_hours = eta_minutes / 60.0
- if( eta_hours <= 24.0 ):
- eta_string = "%.2f hrs" %((eta_hours+self.last_eta_hours)/2.0)
- self.last_eta_hours = eta_hours
- else:
- eta_days = eta_hours / 24.0
- eta_string = "%.2f days" %((eta_days+self.last_eta_days)/2.0)
- self.last_eta_days = eta_days
-
- if( worker.failed ):
- worker.eta = ""
- else:
- worker.eta = eta_string
-
- except:
- #FIXME: Log info here or really don't care ever? Clarify.
- pass
-
- def queue_url(self, url, outfile=None, params=None, post_data=None, gui_label="", manifest_size=-1):
- if( url == "" ): return
-
- for w in self.fetch_pool:
- if w.url == url: #and w.active == True:
- self.log("We already have a thread dealing with that URL...ignored.")
- return
-
- fetcher = Worker_FetchFile(url=url, outfile=outfile, gui_label=gui_label, manifest_size=manifest_size, parent=self)
- self.fetch_pool.append( fetcher )
-
- self.log("QD: %s" %(url))
- def run(self):
-
- #This loop controls start/stop of workers fetching content as well as servicing active workers.
- #It reads the user-facing sleep timers and adapts to control how much b/width we chew.
-
- self.active_count = 0 #active downloaders count
-
- while(True):
- if( self.throttle < self.throttle_target ):
- self.throttle += 2
-
- if( self.throttle > self.throttle_target ):
- self.throttle -= 2
-
-
- if( self.timeToQuit.isSet() ):
- break
-
- started_count = 0 #how many have we started in this run? - currently we only start one per run.
- destroy_pool = [] #collect items that are finished and clean them up, multi pass, 1st; find, 2nd; delete
- self.total_bytesPerSecond_tmp = 0 #stats accumulator
-
- self.total_bytesInPool = 0
-
- #asses number of active workers.
- self.active_count = 0
- for worker in self.fetch_pool:
- if( worker.active ):
- self.active_count += 1
-
-
- #service active workers.
- for worker in self.fetch_pool:
-
- #we limit how many new transfers can be started in one loop iteration so that we give more time to servicing existing connections
- if( worker.wants_start ):# and len(self.fetch_pool) < self.max_concurrent_transfers):
-
- if( started_count == 0 ):
- #spawning a new transfer.
- if( self.active_count < self.max_concurrent_transfers ):
- self.log("\nStarting a new transfer: %s" %(worker.url))
- worker.begin_transfer()
- self.active_count += 1
- started_count += 1
-
- else:
- self.total_bytesInPool += worker.bytesExpected
- #accumulate stats
-
-
-
- #service existing transfers
- if( worker.active ):
- #do transfer for recieved data, will write to disk as needed.
- worker.do_transfer()
-
- #adjust worker settings according to users interactive widgets.
- worker.block_size = self.worker_block_size
- self.total_bytesPerSecond_tmp += worker.bytesPerSecond
-
- self.total_bytesInPool += worker.bytesExpected
-
-
- # ------------ old and dusty comments, no idea how relevant. ---------
-
- #Worker has finished, add to clean up pool.
- #if( worker.finished ):
- # self.log("Tidy up\n\n\n")
- # worker.finish_transfer()
- # self.active_count -= 1
- # if( worker.payload != "" ):
- # worker.payload = ""
- #
- # destroy_pool.append( worker )
-
- #end of worker pool loop
-
-
- #self.clean_pool()
- # ------------ end old and dusty comments, no idea how relevant. ---------
-
-
-
-
- self.total_bytesPerSecond = self.total_bytesPerSecond_tmp
-
- # ------- code below here is hack-ish throttle tuned code to adjust timing off service loops and block sizes, just dont go there. ----
-
- #sleep delay is tuned. dont change it unless you want to change the slider as well.
- throttle_p = self.throttle / 100.0
- if( throttle_p >= 0.9 ):
- throttle_p = 1.0 #mac slider bug work around.
-
- inv_throttle_p = 1.0 - throttle_p
-
- new_block_size = int(8192.0 * throttle_p)
-
- if( new_block_size < 1 ):
- new_block_size = 1
-
- sleep_delay = 1.0 * (inv_throttle_p)
- if( sleep_delay < 0.001 ):
- sleep_delay = 0.001
-
- self.total_megabytesInPool = self.megabytes( self.total_bytesInPool )
-
- self.worker_block_size = new_block_size
-
- time.sleep(sleep_delay)
- #TODO: Scan code to see if this function is still used.
- def clean_pool(self):
- pass
- #We no longer clean these out, leaving them in leaves them in the GUI and provides the user with more feedback on download state for free.
- #Iterate clean up pool and destroy finished workers.
- #for target in destroy_pool:
- # self.fetch_pool.remove(target)
- def stop(self):
- self.timeToQuit.set()
- # Utility funciton, move to shared module.
- def megabytes( self, value ):
- return float(value) / 1048576.0