PageRenderTime 263ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/src/pagespeed/rules/avoid_landing_page_redirects.cc

http://page-speed.googlecode.com/
C++ | 312 lines | 207 code | 44 blank | 61 comment | 36 complexity | 9db8c711ffd38528871097de99c53957 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0
  1. // Copyright 2012 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "pagespeed/rules/avoid_landing_page_redirects.h"
  15. #include <algorithm>
  16. #include <map>
  17. #include <set>
  18. #include <string>
  19. #include <vector>
  20. #include "base/logging.h"
  21. #include "googleurl/src/gurl.h"
  22. #include "pagespeed/core/formatter.h"
  23. #include "pagespeed/core/pagespeed_input.h"
  24. #include "pagespeed/core/resource.h"
  25. #include "pagespeed/core/resource_cache_computer.h"
  26. #include "pagespeed/core/resource_util.h"
  27. #include "pagespeed/core/result_provider.h"
  28. #include "pagespeed/core/rule_input.h"
  29. #include "pagespeed/core/string_util.h"
  30. #include "pagespeed/core/uri_util.h"
  31. #include "pagespeed/l10n/l10n.h"
  32. #include "pagespeed/proto/pagespeed_output.pb.h"
  33. namespace {
  34. const char* kRuleName = "AvoidLandingPageRedirects";
  35. const char* kLoginSubstring = "login";
  36. const int64 kMillisInADay = 1000 * 60 * 60 * 24;
  37. const int64 kMillisInAWeek = kMillisInADay * 7;
  38. const pagespeed::RedirectionDetails* GetDetails(
  39. const pagespeed::Result& result) {
  40. const pagespeed::ResultDetails& details = result.details();
  41. if (!details.HasExtension(
  42. pagespeed::RedirectionDetails::message_set_extension)) {
  43. LOG(DFATAL) << "RedirectionDetails missing.";
  44. return NULL;
  45. }
  46. return &details.GetExtension(
  47. pagespeed::RedirectionDetails::message_set_extension);
  48. }
  49. bool SortRuleResultsByRedirection(const pagespeed::Result* lhs,
  50. const pagespeed::Result* rhs) {
  51. const pagespeed::RedirectionDetails* lhs_details = GetDetails(*lhs);
  52. const pagespeed::RedirectionDetails* rhs_details = GetDetails(*rhs);
  53. return lhs_details->chain_index() < rhs_details->chain_index();
  54. }
  55. // Return the host:port pair from a GURL as a string. If the port is not
  56. // explicitly given, infer it from the URL scheme.
  57. std::string GetHostAndPort(const GURL& gurl) {
  58. std::string host = gurl.host();
  59. std::string port = gurl.port();
  60. if (port.empty()) {
  61. port = (gurl.scheme() == "https" ? "443" : "80");
  62. }
  63. return host + ":" + port;
  64. }
  65. } // namespace
  66. namespace pagespeed {
  67. namespace rules {
  68. AvoidLandingPageRedirects::AvoidLandingPageRedirects()
  69. : Rule(InputCapabilities()) {
  70. }
  71. const char* AvoidLandingPageRedirects::name() const {
  72. return kRuleName;
  73. }
  74. UserFacingString AvoidLandingPageRedirects::header() const {
  75. // TRANSLATOR: The name of a Page Speed rule that tells users to avoid
  76. // redirects at the landing page. The landing page is the root
  77. // HTML document that was requested the user in the browser's address bar.
  78. // This is displayed in a list of rule names that Page Speed generates.
  79. return _("Avoid landing page redirects");
  80. }
  81. bool AvoidLandingPageRedirects::AppendResults(
  82. const RuleInput& rule_input, ResultProvider* provider) {
  83. const PagespeedInput& input = rule_input.pagespeed_input();
  84. const Resource* primary_resource =
  85. input.GetResourceCollection().GetPrimaryResourceOrNull();
  86. if (primary_resource == NULL) {
  87. LOG(ERROR) << "Cannot find primary resource.";
  88. return false;
  89. }
  90. const RedirectRegistry::RedirectChain* chain =
  91. input.GetResourceCollection()
  92. .GetRedirectRegistry()->GetRedirectChainOrNull(primary_resource);
  93. // When there is one redirect, the chain size is 2, as it includes both the
  94. // initial url and the final url.
  95. if (chain == NULL || chain->empty() ||
  96. (!input.GetInitialResourceIsCanonical() && chain->size() <= 2)) {
  97. return true;
  98. }
  99. if (resource_util::IsErrorResourceStatusCode(
  100. chain->back()->GetResponseStatusCode())) {
  101. // If the user was redirected to an error page, it should not be a
  102. // bad redirect.
  103. return true;
  104. }
  105. // Keep track of which hostnames we've had to do DNS lookups for so far
  106. // (starting with the original request URL for the page).
  107. std::set<std::string> hosts_used;
  108. const GURL request_gurl(chain->front()->GetRequestUrl());
  109. if (!request_gurl.HostIsIPAddress()) {
  110. hosts_used.insert(request_gurl.host());
  111. }
  112. // Keep track of which host:port combinations we've had to open a TCP
  113. // connection to (starting with the original request URL for the page).
  114. std::set<std::string> tcp_connections_used;
  115. tcp_connections_used.insert(GetHostAndPort(request_gurl));
  116. // All redirections should be avoided for landing page. We flag both temporary
  117. // and permanent redirections.
  118. for (int idx = 0, size = chain->size(); idx < size; ++idx) {
  119. const Resource* resource = chain->at(idx);
  120. if (resource->GetResourceType() != REDIRECT) {
  121. // The last resource in each chain is the final resource, which
  122. // should not be considered here.
  123. continue;
  124. }
  125. // We want to record the redirect and its destination so we can present that
  126. // information in the UI.
  127. if (idx == size - 1) {
  128. continue; // This is the last redirection.
  129. }
  130. const std::string& url = resource->GetRequestUrl();
  131. GURL gurl(url);
  132. const std::string& next_url = chain->at(idx+1)->GetRequestUrl();
  133. GURL next_gurl(next_url);
  134. // We'll have to do a new DNS lookup for the destination of this redirect
  135. // if next_url is not an IP address and hasn't already been looked up.
  136. const std::string next_host = next_gurl.host();
  137. const bool needed_extra_dns =
  138. (!next_gurl.HostIsIPAddress() && hosts_used.count(next_host) == 0);
  139. if (needed_extra_dns) {
  140. hosts_used.insert(next_host);
  141. }
  142. // We'll have to open a new TCP connection for the destination of this
  143. // redirect if we don't already have one open. In addition, we may have to
  144. // do an SSL/TLS handshake first.
  145. const std::string next_host_port = GetHostAndPort(next_gurl);
  146. const bool needed_extra_tcp_handshake =
  147. (tcp_connections_used.count(next_host_port) == 0);
  148. if (needed_extra_tcp_handshake) {
  149. tcp_connections_used.insert(next_host_port);
  150. }
  151. const bool needed_extra_ssl_handshake =
  152. (needed_extra_tcp_handshake && next_gurl.SchemeIsSecure());
  153. Result* result = provider->NewResult();
  154. result->add_resource_urls(url);
  155. result->add_resource_urls(next_url);
  156. // We don't penalize people for the first url when we allow a single redirect.
  157. if (idx != 0 || input.GetInitialResourceIsCanonical()) {
  158. Savings* savings = result->mutable_savings();
  159. savings->set_requests_saved(1);
  160. // TODO(mdsteele): If needed_extra_dns is true, maybe we should also do
  161. // savings->set_dns_requests_saved(1), but only if we won't be using that
  162. // host anyway for other resources.
  163. // TODO(mdsteele): If needed_extra_tcp_handshake is true, maybe we should
  164. // also do savings->set_connections_saved(1), but only if we won't be
  165. // using that connection anyway for other resources.
  166. savings->set_render_blocking_round_trips_saved(
  167. 1 + (needed_extra_dns ? 1 : 0) + (needed_extra_tcp_handshake ? 1 : 0) +
  168. (needed_extra_ssl_handshake ? 1 : 0));
  169. }
  170. ResultDetails* details = result->mutable_details();
  171. RedirectionDetails* redirection_details =
  172. details->MutableExtension(
  173. RedirectionDetails::message_set_extension);
  174. bool permanent_redirection = (resource->GetResponseStatusCode() == 301);
  175. bool cacheable = permanent_redirection;
  176. int64 freshness_lifetime_millis = 0;
  177. ResourceCacheComputer comp(resource);
  178. if (comp.GetFreshnessLifetimeMillis(&freshness_lifetime_millis)) {
  179. cacheable = freshness_lifetime_millis >= kMillisInAWeek;
  180. redirection_details->set_freshness_lifetime_millis(
  181. freshness_lifetime_millis);
  182. LOG(INFO) << "freshness_lifetime_millis: " << freshness_lifetime_millis;
  183. // An explicit cache freshness life time is specified, the redirection is
  184. // not permanent by any way.
  185. redirection_details->set_is_permanent(false);
  186. } else {
  187. redirection_details->set_is_permanent(permanent_redirection);
  188. }
  189. redirection_details->set_is_cacheable(cacheable);
  190. bool same_host = (gurl.host() == next_gurl.host());
  191. redirection_details->set_is_same_host(same_host);
  192. const std::string login(kLoginSubstring);
  193. std::string::const_iterator login_it =
  194. std::search(next_url.begin(), next_url.end(),
  195. login.begin(), login.end(),
  196. string_util::CaseInsensitiveCompareASCII());
  197. bool is_login = (login_it != next_url.end());
  198. redirection_details->set_is_likely_login(is_login);
  199. bool is_callback = (next_gurl.query().find(url) != std::string::npos);
  200. redirection_details->set_is_likely_callback(is_callback);
  201. redirection_details->set_chain_index(idx);
  202. redirection_details->set_chain_length(size);
  203. }
  204. return true;
  205. }
  206. void AvoidLandingPageRedirects::FormatResults(
  207. const ResultVector& results, RuleFormatter* formatter) {
  208. if (results.empty()) {
  209. // TRANSLATOR: A summary to give a general overview of this Page
  210. // Speed rule.
  211. formatter->SetSummaryLine(
  212. _("Your page has no redirects. Learn more about %(BEGIN_LINK)savoiding "
  213. "landing page redirects%(END_LINK)s."),
  214. HyperlinkArgument(
  215. "LINK",
  216. "https://developers.google.com/speed/docs/insights/"
  217. "AvoidRedirects"));
  218. return;
  219. }
  220. // TRANSLATOR: A summary to give a general overview of this Page
  221. // Speed rule.
  222. formatter->SetSummaryLine(
  223. _("Your page has %(NUM_REDIRECTS)s redirects. "
  224. "Redirects introduce additional delays before "
  225. "the page can be loaded."),
  226. IntArgument("NUM_REDIRECTS", results.size()));
  227. UrlBlockFormatter* body = formatter->AddUrlBlock(
  228. // TRANSLATOR: Header at the top of a list of URLs that Page
  229. // Speed detected as a chain of HTTP redirections. It tells the
  230. // user to fix the problem by removing the URLs that redirect to
  231. // others. The text between BEGIN_LINK and END_LINK will be
  232. // displayed as a clickable link in the browser, which takes the
  233. // user to a document providing additional information.
  234. _("%(BEGIN_LINK)sAvoid landing page redirects%(END_LINK)s for the "
  235. "following chain of redirected URLs."),
  236. HyperlinkArgument(
  237. "LINK",
  238. "https://developers.google.com/speed/docs/insights/AvoidRedirects"));
  239. // Add the very first url onto the front of the list. Then loop over
  240. // everything, adding on the second url from each result.
  241. body->AddUrlResult(
  242. not_localized("%(FIRST_URL)s"),
  243. UrlArgument("FIRST_URL", (*results.begin())->resource_urls(0)));
  244. for (ResultVector::const_iterator iter = results.begin(),
  245. end = results.end();
  246. iter != end;
  247. ++iter) {
  248. const Result& result = **iter;
  249. if (result.resource_urls_size() != 2) {
  250. LOG(DFATAL) << "Unexpected number of resource URLs. Expected 2, Got "
  251. << result.resource_urls_size() << ".";
  252. continue;
  253. }
  254. body->AddUrlResult(
  255. not_localized("%(REDIRECTED_URL)s"),
  256. UrlArgument("REDIRECTED_URL", result.resource_urls(1)));
  257. }
  258. }
  259. void AvoidLandingPageRedirects::SortResultsInPresentationOrder(
  260. ResultVector* rule_results) const {
  261. // Sort the results in request order so that the user can easily see the
  262. // redirection chain.
  263. std::stable_sort(rule_results->begin(),
  264. rule_results->end(),
  265. SortRuleResultsByRedirection);
  266. }
  267. } // namespace rules
  268. } // namespace pagespeed