PageRenderTime 59ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/gattica/engine.rb

https://github.com/deviantech/gattica
Ruby | 283 lines | 152 code | 34 blank | 97 comment | 27 complexity | 1f9f528d156c2fb5f93b92f6d2d95332 MD5 | raw file
  1. module Gattica
  2. class Engine
  3. attr_reader :user
  4. attr_accessor :profile_id, :token
  5. # Initialize Gattica using username/password or token.
  6. #
  7. # == Options:
  8. # To change the defaults see link:settings.rb
  9. # +:debug+:: Send debug info to the logger (default is false)
  10. # +:email+:: Your email/login for Google Analytics
  11. # +:headers+:: Add additional HTTP headers (default is {} )
  12. # +:logger+:: Logger to use (default is STDOUT)
  13. # +:password+:: Your password for Google Analytics
  14. # +:profile_id+:: Use this Google Analytics profile_id (default is nil)
  15. # +:timeout+:: Set Net:HTTP timeout in seconds (default is 300)
  16. # +:token+:: Use an authentication token you received before
  17. def initialize(options={})
  18. @options = Settings::DEFAULT_OPTIONS.merge(options)
  19. handle_init_options(@options)
  20. create_http_connection()
  21. check_init_auth_requirements()
  22. # TODO: check that the user has access to the specified profile and show an error here rather than wait for Google to respond with a message
  23. end
  24. # Returns the list of accounts the user has access to. A user may have
  25. # multiple accounts on Google Analytics and each account may have multiple
  26. # profiles. You need the profile_id in order to get info from GA. If you
  27. # don't know the profile_id then use this method to get a list of all them.
  28. # Then set the profile_id of your instance and you can make regular calls
  29. # from then on.
  30. #
  31. # ga = Gattica.new({:email => 'johndoe@google.com', :password => 'password'})
  32. # ga.accounts
  33. # # you parse through the accounts to find the profile_id you need
  34. # ga.profile_id = 12345678
  35. # # now you can perform a regular search, see Gattica::Engine#get
  36. #
  37. # If you pass in a profile id when you instantiate Gattica::Search then you won't need to
  38. # get the accounts and find a profile_id - you apparently already know it!
  39. #
  40. # See Gattica::Engine#get to see how to get some data.
  41. def accounts
  42. # if we haven't retrieved the user's accounts yet, get them now and save them
  43. if @user_accounts.nil?
  44. data = request_default_account_feed
  45. xml = Hpricot(data)
  46. @user_accounts = xml.search(:entry).collect { |entry| Account.new(entry) }
  47. end
  48. return @user_accounts
  49. end
  50. # Returns the list of segments available to the authenticated user.
  51. #
  52. # == Usage
  53. # ga = Gattica.new({:email => 'johndoe@google.com', :password => 'password'})
  54. # ga.segments # Look up segment id
  55. # my_gaid = 'gaid::-5' # Non-paid Search Traffic
  56. # ga.profile_id = 12345678 # Set our profile ID
  57. #
  58. # gs.get({ :start_date => '2008-01-01',
  59. # :end_date => '2008-02-01',
  60. # :dimensions => 'month',
  61. # :metrics => 'views',
  62. # :segment => my_gaid })
  63. def segments
  64. if @user_segments.nil?
  65. data = request_default_account_feed
  66. xml = Hpricot(data)
  67. @user_segments = xml.search("dxp:segment").collect { |s| Segment.new(s) }
  68. end
  69. return @user_segments
  70. end
  71. # This is the method that performs the actual request to get data.
  72. #
  73. # == Usage
  74. #
  75. # gs = Gattica.new({:email => 'johndoe@google.com', :password => 'password', :profile_id => 123456})
  76. # gs.get({ :start_date => '2008-01-01',
  77. # :end_date => '2008-02-01',
  78. # :dimensions => 'browser',
  79. # :metrics => 'pageviews',
  80. # :sort => 'pageviews',
  81. # :filters => ['browser == Firefox']})
  82. #
  83. # == Input
  84. #
  85. # When calling +get+ you'll pass in a hash of options. For a description of what these mean to
  86. # Google Analytics, see http://code.google.com/apis/analytics/docs
  87. #
  88. # Required values are:
  89. #
  90. # * +start_date+ => Beginning of the date range to search within
  91. # * +end_date+ => End of the date range to search within
  92. #
  93. # Optional values are:
  94. #
  95. # * +dimensions+ => an array of GA dimensions (without the ga: prefix)
  96. # * +metrics+ => an array of GA metrics (without the ga: prefix)
  97. # * +filter+ => an array of GA dimensions/metrics you want to filter by (without the ga: prefix)
  98. # * +sort+ => an array of GA dimensions/metrics you want to sort by (without the ga: prefix)
  99. #
  100. # == Exceptions
  101. #
  102. # If a user doesn't have access to the +profile_id+ you specified, you'll receive an error.
  103. # Likewise, if you attempt to access a dimension or metric that doesn't exist, you'll get an
  104. # error back from Google Analytics telling you so.
  105. def get(args={})
  106. args = validate_and_clean(Settings::DEFAULT_ARGS.merge(args))
  107. query_string = build_query_string(args,@profile_id)
  108. @logger.debug(query_string) if @debug
  109. data = do_http_get("/analytics/feeds/data?#{query_string}")
  110. #data = do_http_get("/analytics/feeds/data?ids=ga%3A915568&metrics=ga%3Avisits&segment=gaid%3A%3A-7&start-date=2010-03-29&end-date=2010-03-29&max-results=50")
  111. return DataSet.new(Hpricot.XML(data))
  112. end
  113. # Since google wants the token to appear in any HTTP call's header, we have to set that header
  114. # again any time @token is changed so we override the default writer (note that you need to set
  115. # @token with self.token= instead of @token=)
  116. def token=(token)
  117. @token = token
  118. set_http_headers
  119. end
  120. ######################################################################
  121. private
  122. # Gets the default account feed from Google
  123. def request_default_account_feed
  124. if @default_account_feed.nil?
  125. @default_account_feed = do_http_get('/analytics/feeds/accounts/default')
  126. end
  127. return @default_account_feed
  128. end
  129. # Does the work of making HTTP calls and then going through a suite of tests on the response to make
  130. # sure it's valid and not an error
  131. def do_http_get(query_string)
  132. response, data = @http.get(query_string, @headers)
  133. # error checking
  134. if response.code != '200'
  135. case response.code
  136. when '400'
  137. raise GatticaError::AnalyticsError, response.body + " (status code: #{response.code})"
  138. when '401'
  139. raise GatticaError::InvalidToken, "Your authorization token is invalid or has expired (status code: #{response.code})"
  140. else # some other unknown error
  141. raise GatticaError::UnknownAnalyticsError, response.body + " (status code: #{response.code})"
  142. end
  143. end
  144. return data
  145. end
  146. # Sets up the HTTP headers that Google expects (this is called any time @token is set either by Gattica
  147. # or manually by the user since the header must include the token)
  148. def set_http_headers
  149. @headers['Authorization'] = "GoogleLogin auth=#{@token}"
  150. @headers['GData-Version']= '2'
  151. end
  152. # Creates a valid query string for GA
  153. def build_query_string(args,profile)
  154. output = "ids=ga:#{profile}&start-date=#{args[:start_date]}&end-date=#{args[:end_date]}"
  155. if (start_index = args[:start_index].to_i) > 0
  156. output += "&start-index=#{start_index}"
  157. end
  158. unless args[:dimensions].empty?
  159. output += '&dimensions=' + args[:dimensions].collect do |dimension|
  160. "ga:#{dimension}"
  161. end.join(',')
  162. end
  163. unless args[:metrics].empty?
  164. output += '&metrics=' + args[:metrics].collect do |metric|
  165. "ga:#{metric}"
  166. end.join(',')
  167. end
  168. unless args[:sort].empty?
  169. output += '&sort=' + args[:sort].collect do |sort|
  170. sort[0..0] == '-' ? "-ga:#{sort[1..-1]}" : "ga:#{sort}" # if the first character is a dash, move it before the ga:
  171. end.join(',')
  172. end
  173. unless args[:segment].nil?
  174. output += "&segment=#{args[:segment]}"
  175. end
  176. unless args[:max_results].nil?
  177. output += "&max-results=#{args[:max_results]}"
  178. end
  179. # TODO: update so that in regular expression filters (=~ and !~), any initial special characters in the regular expression aren't also picked up as part of the operator (doesn't cause a problem, but just feels dirty)
  180. unless args[:filters].empty? # filters are a little more complicated because they can have all kinds of modifiers
  181. output += '&filters=' + args[:filters].collect do |filter|
  182. match, name, operator, expression = *filter.match(/^(\w*)\s*([=!<>~@]*)\s*(.*)$/) # splat the resulting Match object to pull out the parts automatically
  183. unless name.empty? || operator.empty? || expression.empty? # make sure they all contain something
  184. "ga:#{name}#{CGI::escape(operator.gsub(/ /,''))}#{CGI::escape(expression)}" # remove any whitespace from the operator before output
  185. else
  186. raise GatticaError::InvalidFilter, "The filter '#{filter}' is invalid. Filters should look like 'browser == Firefox' or 'browser==Firefox'"
  187. end
  188. end.join(';')
  189. end
  190. return output
  191. end
  192. # Validates that the args passed to +get+ are valid
  193. def validate_and_clean(args)
  194. raise GatticaError::MissingStartDate, ':start_date is required' if args[:start_date].nil? || args[:start_date].empty?
  195. raise GatticaError::MissingEndDate, ':end_date is required' if args[:end_date].nil? || args[:end_date].empty?
  196. raise GatticaError::TooManyDimensions, 'You can only have a maximum of 7 dimensions' if args[:dimensions] && (args[:dimensions].is_a?(Array) && args[:dimensions].length > 7)
  197. raise GatticaError::TooManyMetrics, 'You can only have a maximum of 10 metrics' if args[:metrics] && (args[:metrics].is_a?(Array) && args[:metrics].length > 10)
  198. possible = args[:dimensions] + args[:metrics]
  199. # make sure that the user is only trying to sort fields that they've previously included with dimensions and metrics
  200. if args[:sort]
  201. missing = args[:sort].find_all do |arg|
  202. !possible.include? arg.gsub(/^-/,'') # remove possible minuses from any sort params
  203. end
  204. unless missing.empty?
  205. raise GatticaError::InvalidSort, "You are trying to sort by fields that are not in the available dimensions or metrics: #{missing.join(', ')}"
  206. end
  207. end
  208. # make sure that the user is only trying to filter fields that are in dimensions or metrics
  209. if args[:filters]
  210. missing = args[:filters].find_all do |arg|
  211. !possible.include? arg.match(/^\w*/).to_s # get the name of the filter and compare
  212. end
  213. unless missing.empty?
  214. raise GatticaError::InvalidSort, "You are trying to filter by fields that are not in the available dimensions or metrics: #{missing.join(', ')}"
  215. end
  216. end
  217. return args
  218. end
  219. def create_http_connection
  220. port = Settings::USE_SSL ? Settings::SSL_PORT : Settings::NON_SSL_PORT
  221. @http = Net::HTTP.new(Settings::SERVER, port)
  222. @http.use_ssl = Settings::USE_SSL
  223. @http.set_debug_output $stdout if @options[:debug]
  224. @http.read_timeout = @options[:timeout] if @options[:timeout]
  225. end
  226. # Sets instance variables from options given during initialization and
  227. def handle_init_options(options)
  228. @logger = options[:logger]
  229. @profile_id = options[:profile_id]
  230. @user_accounts = nil # filled in later if the user ever calls Gattica::Engine#accounts
  231. @user_segments = nil
  232. @headers = { }.merge(options[:headers]) # headers used for any HTTP requests (Google requires a special 'Authorization' header which is set any time @token is set)
  233. @default_account_feed = nil
  234. end
  235. # If the authorization is a email and password then create User objects
  236. # or if it's a previous token, use that. Else, raise exception.
  237. def check_init_auth_requirements
  238. if @options[:token].to_s.length > 200 # Not sure actual required length, but mine's 267
  239. self.token = @options[:token]
  240. elsif @options[:email] && @options[:password]
  241. @user = User.new(@options[:email], @options[:password])
  242. @auth = Auth.new(@http, user)
  243. self.token = @auth.tokens[:auth]
  244. else
  245. raise GatticaError::NoLoginOrToken, 'An email and password or an authentication token is required to initialize Gattica.'
  246. end
  247. end
  248. end
  249. end