PageRenderTime 70ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/mfabrik-blog.html

https://github.com/aaitxcoder/mobilize.js
HTML | 1094 lines | 969 code | 118 blank | 7 comment | 0 complexity | 7d56177c5fbd7d2d3774e84e7458238e MD5 | raw file
  1. <!DOCTYPE html>
  2. <!-- Test mobilize.js against another blog with lots of content -->
  3. <html dir="ltr" lang="en-US">
  4. <head>
  5. <meta charset="UTF-8" />
  6. <title>mFabrik - mobile sites, apps, HTML5 and CMS software development</title>
  7. <link rel="profile" href="http://gmpg.org/xfn/11" />
  8. <link rel="stylesheet" type="text/css" media="all" href="http://blog.mfabrik.com/wp-content/themes/twentyten/style.css" />
  9. <link rel="pingback" href="http://blog.mfabrik.com/xmlrpc.php" />
  10. <link rel="alternate" type="application/rss+xml" title="mFabrik - mobile sites, apps, HTML5 and CMS software development &raquo; Feed" href="http://blog.mfabrik.com/feed/" />
  11. <link rel="alternate" type="application/rss+xml" title="mFabrik - mobile sites, apps, HTML5 and CMS software development &raquo; Comments Feed" href="http://blog.mfabrik.com/comments/feed/" />
  12. <link rel='stylesheet' id='shashin_css-css' href='http://blog.mfabrik.com/wp-content/plugins/shashin/display/shashin.css?ver=2.6.3' type='text/css' media='all' />
  13. <link rel='stylesheet' id='highslide_css-css' href='http://blog.mfabrik.com/wp-content/plugins/shashin/display/highslide.css?ver=4.1.4' type='text/css' media='all' />
  14. <script type='text/javascript' src='http://blog.mfabrik.com/wp-includes/js/l10n.js?ver=20101110'></script>
  15. <script type='text/javascript'>
  16. /* <![CDATA[ */
  17. var highslide_settings = {
  18. graphics_dir: "http://blog.mfabrik.com/wp-content/plugins/shashin/display/highslide/graphics/",
  19. outline_type: "rounded-white",
  20. dimming_opacity: "0.75",
  21. interval: "5000",
  22. repeat: "1",
  23. position: "top center",
  24. hide_controller: "0"
  25. };
  26. /* ]]> */
  27. </script>
  28. <script type='text/javascript' src='http://blog.mfabrik.com/wp-content/plugins/shashin/display/highslide_settings.js?ver=2.6.3'></script>
  29. <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://blog.mfabrik.com/xmlrpc.php?rsd" />
  30. <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://blog.mfabrik.com/wp-includes/wlwmanifest.xml" />
  31. <link rel='index' title='mFabrik &#8211; mobile sites, apps, HTML5 and CMS software development' href='http://blog.mfabrik.com/' />
  32. <meta name="generator" content="WordPress 3.1" />
  33. <!-- All in One SEO Pack 1.6.13.1 by Michael Torbert of Semper Fi Web Design[78,176] -->
  34. <meta name="keywords" content="all files,cyberduck,doc,download,drag and drop,export,import,mass upload,osx,pdf,photos,plone,webdav,answer,discussion,forum,irc,mailing list,python,question,smart questions,apache,error_log,php,php.ini,tail,xampp,chrome,commit hook,compress,console,firefox,git,github,ie,internet explorer,javascript,jquery,preprocessor,safari,webkit,app engine,data,mirror,remote api,sqlite,upload,always on,appengine,cloud,cloud computing,cron,deadlineexceedederror,dynamic,gaeutilities,google,http requiest,instance,monitoring,new process,response time,zabbix,ironpython,jython,microsoft,net,pypy,python tools for visual studio,adapter,interface,page template,portlet,render,zope,ajax,comments,disqus,lazy load,static,varnish,crawl,find,follow,full text,full text search,pypdf,scrape,scrapy,search,trademark,user policy,violation,web crawler" />
  35. <link rel="canonical" href="http://blog.mfabrik.com/" />
  36. <!-- /all in one seo pack -->
  37. <link type="text/css" rel="stylesheet" href="http://blog.mfabrik.com/wp-content/plugins/syntax/Styles/SyntaxHighlighter.css" />
  38. </head>
  39. <body class="home blog logged-in">
  40. <body>
  41. <script type="text/javascript">
  42. // Don't start executing mobilize whilst loaading JS file, but wait
  43. // for our manual (development commands)
  44. window.mobilizeAutoload = false;
  45. function mobilizeCustomInit() {
  46. // Include mFabrik specific Javascript initialization layer
  47. mobilize.cdnOptions.javascriptBundles.push("http://localhost:8080/js/mobilize.mfabrik-blog.js");
  48. }
  49. </script>
  50. <script class="mobilize-js-source"
  51. type="text/javascript"
  52. src="http://localhost:8080/js/mobilize.js"
  53. >
  54. </script>
  55. <script type="text/javascript"
  56. src="http://localhost:8080/js/mobilize.wordpress.js"
  57. >
  58. </script>
  59. <script type="text/javascript">
  60. // Setup mobilize.js to load our additional mobilize.mfabrik-blog.js layer
  61. function setupCustomBlog(){
  62. mobilize.init({
  63. }, {
  64. // Don't do cloud error reporting
  65. // (it would useful for production deployment only)
  66. errorReportingURL: false,
  67. baseURL: "http://localhost:8080", // Test server
  68. // Load JS files locally, unbundled
  69. javascriptBundles : ["js/jquery.js",
  70. "js/mobilize.onjq.js",
  71. "js/jquery.mobile.js",
  72. ],
  73. // Load CSS files locally, unbundled
  74. cssBundles: ["css/jquery.mobile.css",
  75. "css/wordpress.css"],
  76. });
  77. // Since we are not in auto-run mode,
  78. // we start doing the stuff after we have set-up
  79. // our options for development correctly
  80. mobilize.bootstrap();
  81. }
  82. setupCustomBlog();
  83. </script>
  84. <div id="wrapper" class="hfeed">
  85. <div id="header">
  86. <div id="masthead">
  87. <div id="branding" role="banner">
  88. <h1 id="site-title">
  89. <span>
  90. <a href="http://blog.mfabrik.com/" title="mFabrik &#8211; mobile sites, apps, HTML5 and CMS software development" rel="home">mFabrik &#8211; mobile sites, apps, HTML5 and CMS software development</a>
  91. </span>
  92. </h1>
  93. <div id="site-description">Freedom delivered.</div>
  94. <img src="http://blog.mfabrik.com/wp-content/themes/twentyten/images/headers/path.jpg" width="940" height="198" alt="" />
  95. </div><!-- #branding -->
  96. <div id="access" role="navigation">
  97. <div class="skip-link screen-reader-text"><a href="#content" title="Skip to content">Skip to content</a></div>
  98. <div class="menu"><ul><li class="current_page_item"><a href="http://blog.mfabrik.com/" title="Home">Home</a></li><li class="page_item page-item-2"><a href="http://blog.mfabrik.com/about/" title="About this blog">About this blog</a></li><li class="page_item page-item-336"><a href="http://blog.mfabrik.com/phonegap-on-maemo/" title="PhoneGap on Maemo">PhoneGap on Maemo</a></li></ul></div>
  99. </div><!-- #access -->
  100. </div><!-- #masthead -->
  101. </div><!-- #header -->
  102. <div id="main">
  103. <div id="container">
  104. <div id="content" role="main">
  105. <div id="nav-above" class="navigation">
  106. <div class="nav-previous"><a href="http://blog.mfabrik.com/page/2/" ><span class="meta-nav">&larr;</span> Older posts</a></div>
  107. <div class="nav-next"></div>
  108. </div><!-- #nav-above -->
  109. <div id="post-1133" class="post-1133 post type-post status-publish format-standard hentry category-plone category-technology tag-all-files tag-cyberduck tag-doc tag-download tag-drag-and-drop tag-export tag-import tag-mass-upload tag-osx tag-pdf tag-photos tag-plone tag-webdav">
  110. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/18/making-an-offline-file-system-copy-of-a-plone-site-using-webdav/" title="Permalink to Making an offline file system copy of a Plone site using WebDAV" rel="bookmark">Making an offline file system copy of a Plone site using WebDAV</a></h2>
  111. <div class="entry-meta">
  112. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/18/making-an-offline-file-system-copy-of-a-plone-site-using-webdav/" title="12:06 pm" rel="bookmark"><span class="entry-date">March 18, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  113. <div class="entry-content">
  114. <p>You might want to create an offline copy of a Plone site because</p>
  115. <ul>
  116. <li>You are traveling and you want to have all files on the site (PDFs for reading)</li>
  117. <li>You are taking a site down and making the final back-up</li>
  118. <li>You just want to feel how cool Plone is</li>
  119. </ul>
  120. <p><a href="http://plone.org/documentation/kb/webdav">Plone supports WebDAV</a>.</p>
  121. <p>Creating a file system viewable offline copy of a Plone site is a task of</p>
  122. <ul>
  123. <li><a href="http://plone.org/documentation/kb/webdav">Enabling WebDAV</a></li>
  124. <li>Login to site via WebDAV. On OSX use <a href="http://cyberduck.ch/">Cyberduck</a>, Finder (the file browser of OSX itself) may have issues, though works. You might need Zope admin priviledges for certain operations.</li>
  125. <li>Drag and drop Plone site to your hard disk</li>
  126. </ul>
  127. <p>WebDAV copy process works smoothly</p>
  128. <ul>
  129. <li>Folder and page structure is intact</li>
  130. <li>Files are copied as is (think PDFs, Docs)</li>
  131. <li>Images are copied as is</li>
  132. <li>Pages (HTML) are converted to special files, which are still readable in plain-text editor</li>
  133. </ul>
  134. <p>Note that some special folders (acl_users, reference_catalog, etc.) might be exposed through WebDAV, but they are not really copyable. Just ignore these during the copy process.</p>
  135. <p>You can also use WebDAV to mass upload files and images for your image bank instead of manually uploading them through web interface.
  136. <p class="signature">
  137. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  138. </div><!-- .entry-content -->
  139. <div class="entry-utility">
  140. <span class="cat-links">
  141. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/plone/" title="View all posts in plone" rel="category tag">plone</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  142. <span class="meta-sep">|</span>
  143. <span class="tag-links">
  144. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/all-files/" rel="tag">all files</a>, <a href="http://blog.mfabrik.com/tag/cyberduck/" rel="tag">cyberduck</a>, <a href="http://blog.mfabrik.com/tag/doc/" rel="tag">doc</a>, <a href="http://blog.mfabrik.com/tag/download/" rel="tag">download</a>, <a href="http://blog.mfabrik.com/tag/drag-and-drop/" rel="tag">drag and drop</a>, <a href="http://blog.mfabrik.com/tag/export/" rel="tag">export</a>, <a href="http://blog.mfabrik.com/tag/import/" rel="tag">import</a>, <a href="http://blog.mfabrik.com/tag/mass-upload/" rel="tag">mass upload</a>, <a href="http://blog.mfabrik.com/tag/osx/" rel="tag">osx</a>, <a href="http://blog.mfabrik.com/tag/pdf/" rel="tag">pdf</a>, <a href="http://blog.mfabrik.com/tag/photos/" rel="tag">photos</a>, <a href="http://blog.mfabrik.com/tag/plone/" rel="tag">plone</a>, <a href="http://blog.mfabrik.com/tag/webdav/" rel="tag">webdav</a> </span>
  145. <span class="meta-sep">|</span>
  146. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/18/making-an-offline-file-system-copy-of-a-plone-site-using-webdav/#respond" title="Comment on Making an offline file system copy of a Plone site using WebDAV">Leave a comment</a></span>
  147. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1133&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  148. </div><!-- #post-## -->
  149. <div id="post-1128" class="post-1128 post type-post status-publish format-standard hentry category-plone category-python category-technology tag-answer tag-discussion tag-forum tag-irc tag-mailing-list tag-plone tag-python tag-question tag-smart-questions">
  150. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/17/rfc-simple-internet-question-asking-protocol-for-human-beings/" title="Permalink to RFC: Simple Internet Question Asking Protocol (for human beings)" rel="bookmark">RFC: Simple Internet Question Asking Protocol (for human beings)</a></h2>
  151. <div class="entry-meta">
  152. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/17/rfc-simple-internet-question-asking-protocol-for-human-beings/" title="8:32 pm" rel="bookmark"><span class="entry-date">March 17, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  153. <div class="entry-content">
  154. <p>This is my  attempt version 0.1 to teach the world how one should ask questions in the simplest possible way in Internet discussion. To make it simple, I try to keep this short. This post sprouts from my frustration from the lack of people&#8217;s ability to form questions one could easily answer.</p>
  155. <h2>Assumptions</h2>
  156. <p>If you want to ask a question in forum, IRC (chat) or mailing list</p>
  157. <ol>
  158. <li>Assume people are busy</li>
  159. <li>Assume that people want to help you, even though they are busy, since they volunteer to participate the community discussion and thus they must care about the community</li>
  160. </ol>
  161. <p>To make it win-win situation, you as the question maker, are responsible of making the process of asking the question and answering the question as easy as possible. <strong>Form your question in such a way that it is as easy as possible for the readers to place themselves into your situation and think how they would themselves solve the situation </strong>(Mikko&#8217;s rule of empathy)<strong>.</strong></p>
  162. <p>The less time it takes to undestand your situation the more likely people are willing to contribute their time.</p>
  163. <h2>Question process</h2>
  164. <p>Thus, I propose that you always follow the simple three steps when asking a question</p>
  165. <ol>
  166. <li>Before asking the question tell what you <strong>already know</strong></li>
  167. <li>Describe<strong> the problem</strong></li>
  168. <li>Ask what you <strong>do not know</strong> yet</li>
  169. </ol>
  170. <p>Then <strong>wait</strong> patiently for the answer (the busy part).</p>
  171. <h2>Pitfalls</h2>
  172. <p>These issues often stem from the fact that the person asking the question is not familiar with text-based communication where people&#8217;s time (bandwidth) is limited and the lack of body gestures often leads to misinterpretations.</p>
  173. <ol>
  174. <li>Do not ask yes / no questions. You are skipping steps #1 and #3.</li>
  175. <li>Do not saturate the bandwidth: do not repeat yourself or otherwise flood the medium. If people are busy it it does not make them un-busy by repeating yourself. You are breaking the assumption #1.</li>
  176. <li>Do not try to pull excessive attention on you &#8211; do not try to highlight your question like &#8220;PLEASE HELP !!!!&#8221; Even if it is a matter of life and dead for you it is not for the other people who are dealing with their own matters of life and dead. You are breaking the assumption #2.</li>
  177. </ol>
  178. <h2>Example</h2>
  179. <p><span style="text-decoration: line-through;">Q: Is it possible to fly me to the Moon? A: Yes</span></p>
  180. <p>Q: I am an evil super-villain whose plan overtake the world failed.  Now I must escape. I am looking for methods to take me to the Moon or the orbit where national laws to do not apply. I am not sure should I use a shuttle or a rocket. Where could I obtain such a vehicle?</p>
  181. <p>A: US of A just retired one reliable space shuttle what you could use. But if I were you I&#8217;d consider underwater base instead, as they will become cheaper in long run, since you can more easily produce breathable oxygen.</p>
  182. <h2>More info</h2>
  183. <ul>
  184. <li><a href="http://doctormo.deviantart.com/#/d2unj89">How to ask smart questions</a></li>
  185. <li><a href="http://plone.org/documentation/kb/ask-for-help">Plone community guidelines asking for help</a></li>
  186. </ul>
  187. <p class="signature">
  188. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  189. </div><!-- .entry-content -->
  190. <div class="entry-utility">
  191. <span class="cat-links">
  192. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/plone/" title="View all posts in plone" rel="category tag">plone</a>, <a href="http://blog.mfabrik.com/category/python/" title="View all posts in python" rel="category tag">python</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  193. <span class="meta-sep">|</span>
  194. <span class="tag-links">
  195. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/answer/" rel="tag">answer</a>, <a href="http://blog.mfabrik.com/tag/discussion/" rel="tag">discussion</a>, <a href="http://blog.mfabrik.com/tag/forum/" rel="tag">forum</a>, <a href="http://blog.mfabrik.com/tag/irc/" rel="tag">irc</a>, <a href="http://blog.mfabrik.com/tag/mailing-list/" rel="tag">mailing list</a>, <a href="http://blog.mfabrik.com/tag/plone/" rel="tag">plone</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/question/" rel="tag">question</a>, <a href="http://blog.mfabrik.com/tag/smart-questions/" rel="tag">smart questions</a> </span>
  196. <span class="meta-sep">|</span>
  197. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/17/rfc-simple-internet-question-asking-protocol-for-human-beings/#respond" title="Comment on RFC: Simple Internet Question Asking Protocol (for human beings)">Leave a comment</a></span>
  198. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1128&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  199. </div><!-- #post-## -->
  200. <div id="post-1124" class="post-1124 post type-post status-publish format-standard hentry category-php category-technology tag-apache tag-errorlog tag-osx tag-php tag-php-ini tag-tail tag-xampp">
  201. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/16/enable-php-log-output-error_log-on-xampp-on-osx/" title="Permalink to Enable PHP log output (error_log) on XAMPP on OSX" rel="bookmark">Enable PHP log output (error_log) on XAMPP on OSX</a></h2>
  202. <div class="entry-meta">
  203. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/16/enable-php-log-output-error_log-on-xampp-on-osx/" title="12:29 am" rel="bookmark"><span class="entry-date">March 16, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  204. <div class="entry-content">
  205. <p>If you are using <a href="http://www.apachefriends.org/en/xampp.html">XAMPP</a> to develop PHP software (WordPress, Joomla!) on OSX you might want to get some advanced logging output from your code. PHP provides nice <a href="http://php.net/manual/en/function.error-log.php">error_log</a>() function, but it is silent by default. Here are short instructions how to enable it and follow the log.</p>
  206. <p>Use your favorite editor to edit php.ini file in <em>/Applications/XAMPP/etc/php.ini</em> &#8211; sudo priviledges needed, <a href="http://www.peterborgapps.com/smultron/">Smultron</a> does it out of the box.</p>
  207. <p>Change lines:</p>
  208. <pre>log_errors = Off</pre>
  209. <pre>;error_log = filename</pre>
  210. <p>To:</p>
  211. <pre>log_errors = on</pre>
  212. <pre>error_log = /tmp/php.log</pre>
  213. <p>Restart Apache using <em>XAMPP controller</em> in <em>Finder -&gt; Applications</em>.</p>
  214. <p>Now use the following UNIX command to see continuous log flow in your terminal:</p>
  215. <pre>tail -f /tmp/php.log
  216. </pre>
  217. <p>See also the earlier article about <a href="http://blog.mfabrik.com/2010/12/22/local-xampp-development-and-unix-file-permissions/">XAMPP and file permissions</a>.
  218. <p class="signature">
  219. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  220. </div><!-- .entry-content -->
  221. <div class="entry-utility">
  222. <span class="cat-links">
  223. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/php/" title="View all posts in php" rel="category tag">php</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  224. <span class="meta-sep">|</span>
  225. <span class="tag-links">
  226. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/apache/" rel="tag">apache</a>, <a href="http://blog.mfabrik.com/tag/error_log/" rel="tag">error_log</a>, <a href="http://blog.mfabrik.com/tag/osx/" rel="tag">osx</a>, <a href="http://blog.mfabrik.com/tag/php/" rel="tag">php</a>, <a href="http://blog.mfabrik.com/tag/php-ini/" rel="tag">php.ini</a>, <a href="http://blog.mfabrik.com/tag/tail/" rel="tag">tail</a>, <a href="http://blog.mfabrik.com/tag/xampp/" rel="tag">xampp</a> </span>
  227. <span class="meta-sep">|</span>
  228. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/16/enable-php-log-output-error_log-on-xampp-on-osx/#respond" title="Comment on Enable PHP log output (error_log) on XAMPP on OSX">Leave a comment</a></span>
  229. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1124&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  230. </div><!-- #post-## -->
  231. <div id="post-1119" class="post-1119 post type-post status-publish format-standard hentry category-javascript category-technology tag-chrome tag-commit-hook tag-compress tag-console tag-firefox tag-git tag-github tag-ie tag-internet-explorer tag-javascript tag-jquery tag-plone tag-preprocessor tag-python tag-safari tag-webkit">
  232. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/15/everyone-loves-and-hates-console-log/" title="Permalink to Everyone loves and hates console.log()" rel="bookmark">Everyone loves and hates console.log()</a></h2>
  233. <div class="entry-meta">
  234. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/15/everyone-loves-and-hates-console-log/" title="4:35 pm" rel="bookmark"><span class="entry-date">March 15, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  235. <div class="entry-content">
  236. <p>console.log()  is the best friend of every Javascript junkie. However, the lack of it isn&#8217;t. console.log() function is only available in Webkit based browsers and with Firebug in Firefox. It&#8217;s the infamous situation that someone leaves console.log() to Javascript code, doesn&#8217;t notice its presence, commits the file and suddenly all Javascript on the production server stops working for Internet Explorer users&#8230;.</p>
  237. <p>To tackle the lack of console.log() problem there are several approaches.</p>
  238. <h2>Use dummy placeholder if console is missing</h2>
  239. <p>This snippet wraps console.log (need to repeat for console.error etc.):</p>
  240. <pre>// Ignore console on platforms where it is not available
  241. if (typeof(window["console"]) == "undefined") { console = {}; console.log = function(a) {}; }</pre>
  242. <p>Pros</p>
  243. <ul>
  244. <li>Easy</li>
  245. </ul>
  246. <p>Cons</p>
  247. <ul>
  248. <li>Need to add to every Javascript file</li>
  249. <li>Messes with global namespace</li>
  250. </ul>
  251. <h2>Use module specific log function</h2>
  252. <p>This makes your code little bit ugly, more Java like. Each Javascript module declares their own log() function which checks the existence of console.log() and outputs there if it&#8217;s present.</p>
  253. <pre>mfabrik.log =function(x) {
  254. if(console.log) {
  255. console.log(x);
  256. }
  257. }
  258. mfabrik.log("My log messages")</pre>
  259. <p>Pros</p>
  260. <ul>
  261. <li>Easy to hook other logg</li>
  262. <li>You can disable all logging output with one if</li>
  263. </ul>
  264. <p>Cons</p>
  265. <ul>
  266. <li>Not as natural to write as console.log()</li>
  267. <li>Need to add to every Javascript module</li>
  268. </ul>
  269. <h2>Preprocess Javascript files</h2>
  270. <p>Plone (Kukit / KSS) uses this approach. All debug Javascript is hidden behind conditional comments and it is filtered out when JS files are bundled for the production deployment. (<a href="http://codespeak.net/svn/kukit/kss.core/trunk/kss/core/pluginregistry/_concatresource/compression/javascript.py">The preprocessing code is here in Python for those who are interested in it</a>).</p>
  271. <pre>if (_USE_BASE2) {
  272. // Base2 legacy version: matchAll has to be used
  273. // Base2 recent version: querySelectorAll has to be used
  274. var _USE_BASE2_LEGACY = (typeof(base2.DOM.Document.querySelectorAll) == 'undefined');
  275. if (! _USE_BASE2_LEGACY) {
  276. ;;;     kukit.log('Using cssQuery from base2.');</pre>
  277. <p>Pros</p>
  278. <ul>
  279. <li>Makes production Javascript files lighter</li>
  280. <li>Make production Javascript files more professional &#8211; you do not deliver logging statements indented for internal purposes for your site visitors</li>
  281. </ul>
  282. <p>Cons</p>
  283. <ul>
  284. <li>Complex &#8211; preprocessing is required</li>
  285. </ul>
  286. <h2>Commit hooks</h2>
  287. <p>You can use Subversion and Git commit hooks to check that committed JS files do not contain console.log. For example, Plone repositories do this for the Python statement  import pdb ; pdb.set_trace() (enforce pdb breakpoint).</p>
  288. <p>Pros</p>
  289. <ul>
  290. <li>Very robust approach &#8211; you cannot create code with console.log()</li>
  291. </ul>
  292. <p>Cons</p>
  293. <ul>
  294. <li>Prevents also legitimate use of console.log()</li>
  295. <li>Github, for example, lacks possibility to push client-side commit hooks to the repository cloners. This means that every developer must manually install commit hooks themselves. Everything manual you need to do makes the process error prone.</li>
  296. </ul>
  297. <h2>Other approaches?</h2>
  298. <p>Please tell us!
  299. <p class="signature">
  300. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  301. </div><!-- .entry-content -->
  302. <div class="entry-utility">
  303. <span class="cat-links">
  304. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/javascript/" title="View all posts in javascript" rel="category tag">javascript</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  305. <span class="meta-sep">|</span>
  306. <span class="tag-links">
  307. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/chrome/" rel="tag">chrome</a>, <a href="http://blog.mfabrik.com/tag/commit-hook/" rel="tag">commit hook</a>, <a href="http://blog.mfabrik.com/tag/compress/" rel="tag">compress</a>, <a href="http://blog.mfabrik.com/tag/console/" rel="tag">console</a>, <a href="http://blog.mfabrik.com/tag/firefox/" rel="tag">firefox</a>, <a href="http://blog.mfabrik.com/tag/git/" rel="tag">git</a>, <a href="http://blog.mfabrik.com/tag/github/" rel="tag">github</a>, <a href="http://blog.mfabrik.com/tag/ie/" rel="tag">ie</a>, <a href="http://blog.mfabrik.com/tag/internet-explorer/" rel="tag">internet explorer</a>, <a href="http://blog.mfabrik.com/tag/javascript/" rel="tag">javascript</a>, <a href="http://blog.mfabrik.com/tag/jquery/" rel="tag">jquery</a>, <a href="http://blog.mfabrik.com/tag/plone/" rel="tag">plone</a>, <a href="http://blog.mfabrik.com/tag/preprocessor/" rel="tag">preprocessor</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/safari/" rel="tag">safari</a>, <a href="http://blog.mfabrik.com/tag/webkit/" rel="tag">webkit</a> </span>
  308. <span class="meta-sep">|</span>
  309. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/15/everyone-loves-and-hates-console-log/#comments" title="Comment on Everyone loves and hates console.log()">1 Comment</a></span>
  310. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1119&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  311. </div><!-- #post-## -->
  312. <div id="post-1105" class="post-1105 post type-post status-publish format-standard hentry category-appengine category-python category-technology tag-app-engine tag-data tag-download tag-mirror tag-python tag-remote-api tag-sqlite tag-upload">
  313. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/14/mirroring-app-engine-production-data-to-development-server-using-appcfg-py/" title="Permalink to Mirroring App Engine production data to development server using appcfg.py" rel="bookmark">Mirroring App Engine production data to development server using appcfg.py</a></h2>
  314. <div class="entry-meta">
  315. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/14/mirroring-app-engine-production-data-to-development-server-using-appcfg-py/" title="12:14 am" rel="bookmark"><span class="entry-date">March 14, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  316. <div class="entry-content">
  317. <p>Google App Engine provides some <a href="http://code.google.com/appengine/docs/python/tools/uploadingdata.html#Setting_Up_remote_api">remote API functionality out of the box</a>. One of the remote API features  is to download data from the development server. After downloading, then you can upload the downloaded data to your development server, effectively mirroring the content of the production server to your local development server. This is very useful if you are working CMS, sites, etc. where you want to test new layout or views locally against the old data before putting them to the production.</p>
  318. <p>First enable remote API in <em>app.yaml</em>:</p>
  319. <pre>- url: /remote_api
  320.   script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py
  321.   login: admin</pre>
  322. <blockquote><p>Note: Using <em>builtins</em> app.yaml directive didn&#8217;t work for me some reason, so I had to specify remote API URI manually.</p></blockquote>
  323. <p>After this you should be able <a href="http://code.google.com/appengine/docs/python/tools/uploadingdata.html#Downloading_and_Uploading_All_Data">to download data</a>. Here I am using <em>appcfg.py</em> global installation on OSX. Below is the command and sample output.</p>
  324. <pre>appcfg.py -e yourgoogleaccount@gmail.com download_data --url=http://yourappid.appspot.com/remote_api --filename=data.sqlite3
  325. ...
  326. Downloading data records.
  327. [INFO    ] Logging to bulkloader-log-20110313.222523
  328. [INFO    ] Throttling transfers:
  329. [INFO    ] Bandwidth: 250000 bytes/second
  330. [INFO    ] HTTP connections: 8/second
  331. [INFO    ] Entities inserted/fetched/modified: 20/second
  332. [INFO    ] Batch Size: 10
  333. ...
  334. [INFO    ] Have 1803 entities, 0 previously transferred
  335. [INFO    ] 1803 entities (972883 bytes) transferred in 91.0 seconds</pre>
  336. <p>data.sqlite3 is your production database dump in SQLite 3 binary format (used internally by the development server).</p>
  337. <p>If you have sqlite command line tool installed you can explore around the data dump there:</p>
  338. <pre>sqlite3 data.sqlite
  339. SQLite version 3.7.5
  340. Enter ".help" for instructions
  341. Enter SQL statements terminated with a ";"
  342. sqlite&gt; .tables
  343. Apps                                   your-app!Model1!Entities
  344. IdSeq                                  your-app!Model1!EntitiesByProperty
  345. Namespaces                             your-app!Model2!Entities
  346. bulkloader_database_signature          your-app!Model2!EntitiesByProperty
  347. your-app!!Entities                     result
  348. your-app!!EntitiesByProperty</pre>
  349. <p>Now you can upload data.</p>
  350. <blockquote><p><em>Note: </em>Even though there exists option &#8211;use_sqlite for dev_appserver.py looks like it cannot directly use the database file produced by download_data. You cannot just swap database files, you need upload the downloaded data to the development server.</p></blockquote>
  351. <p>Start your development server:</p>
  352. <pre>dev_appserver.py .</pre>
  353. <p>In another terminal, go to downloaded data.sqlite folder and give the command:</p>
  354. <pre>appcfg.py upload_data --url http://localhost:8080/remote_api --file=data.sqlite --application=yourappid</pre>
  355. <p>It will ask you for credentials, but it seems that any username and password is accepted for the local development server.</p>
  356. <p>Now you can login to your local development server to explore the data:</p>
  357. <pre>http://localhost:8080/_ah/admin</pre>
  358. <p>Ensure your data got copied over using Data Viewer:</p>
  359. <pre>http://localhost:8080/_ah/admin</pre>
  360. <p class="signature">
  361. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  362. </div><!-- .entry-content -->
  363. <div class="entry-utility">
  364. <span class="cat-links">
  365. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/appengine/" title="View all posts in appengine" rel="category tag">appengine</a>, <a href="http://blog.mfabrik.com/category/python/" title="View all posts in python" rel="category tag">python</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  366. <span class="meta-sep">|</span>
  367. <span class="tag-links">
  368. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/app-engine/" rel="tag">app engine</a>, <a href="http://blog.mfabrik.com/tag/data/" rel="tag">data</a>, <a href="http://blog.mfabrik.com/tag/download/" rel="tag">download</a>, <a href="http://blog.mfabrik.com/tag/mirror/" rel="tag">mirror</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/remote-api/" rel="tag">remote api</a>, <a href="http://blog.mfabrik.com/tag/sqlite/" rel="tag">sqlite</a>, <a href="http://blog.mfabrik.com/tag/upload/" rel="tag">upload</a> </span>
  369. <span class="meta-sep">|</span>
  370. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/14/mirroring-app-engine-production-data-to-development-server-using-appcfg-py/#respond" title="Comment on Mirroring App Engine production data to development server using appcfg.py">Leave a comment</a></span>
  371. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1105&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  372. </div><!-- #post-## -->
  373. <div id="post-1092" class="post-1092 post type-post status-publish format-standard hentry category-appengine category-python category-technology tag-always-on tag-appengine tag-cloud tag-cloud-computing tag-cron tag-deadlineexceedederror tag-dynamic tag-gaeutilities tag-google tag-http-requiest tag-instance tag-monitoring tag-new-process tag-php tag-python tag-response-time tag-zabbix">
  374. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/11/google-app-engine-issues-with-dynamic-instances-and-deadlineexceedederrors/" title="Permalink to Google App Engine: issues with dynamic instances and DeadlineExceededErrors" rel="bookmark">Google App Engine: issues with dynamic instances and DeadlineExceededErrors</a></h2>
  375. <div class="entry-meta">
  376. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/11/google-app-engine-issues-with-dynamic-instances-and-deadlineexceedederrors/" title="1:24 pm" rel="bookmark"><span class="entry-date">March 11, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  377. <div class="entry-content">
  378. <h2>Dynamic instances and processing time</h2>
  379. <p>This Google App Engine feature came me as a surprise, though it makes perfect sense. Your site is slow if it has low traffic.</p>
  380. <p>Google App Engine runs Python code on instances. By default, instances are dynamic. Instances are shutdown if they do not have enough traffic (requests per minute). Thus, when you get the individual hits to App Engine now and then, App Engine must restart your instance every time for each hit.</p>
  381. <p>When this happens, you see the following in App Engine console logs for every request on low volume traffic:</p>
  382. <pre>This request caused a new process to be started for your application,
  383. and thus caused your application code to be loaded for the first time.</pre>
  384. <p>It is not always ok to add 500 &#8211; 2000 milliseconds processing delay on the top of the normal processing time. Google&#8217;s own recommendation was that each page should be served within 200 milliseconds.</p>
  385. <p>There are three ways to optimize this issue</p>
  386. <ul>
  387. <li>Use App Engine premium feature &#8220;Always on&#8221; 0,30 $ / day which keeps your instance always running</li>
  388. <li>Use cron job or such to keep your instance alive (polling once in a minute seems to do the job)</li>
  389. <li>Optimize your imports and split your code to several modules with light amount of imports, so that start up is fast (modules are imported only once)</li>
  390. </ul>
  391. <p>We are using <a href="http://www.zabbix.com/">Zabbix</a> software to monitor our sites (sidenote: I don&#8217;t recommend Zabbix as the first monitoring software choice as it is very difficult to use and has bad user experience, alienating both sysadmins and developers away from it). This is what we had before optimizations &#8211; App Engine was starting a new process for every request:</p>
  392. <p><a href="http://blog.mfabrik.com/wp-content/uploads/2011/03/zabbix31.png"><img class="alignnone size-full wp-image-1095" title="zabbix3" src="http://blog.mfabrik.com/wp-content/uploads/2011/03/zabbix31.png" alt="" width="791" height="490" /></a></p>
  393. <p>&#8230; and this is output we got after optimizations:</p>
  394. <p><a href="http://blog.mfabrik.com/wp-content/uploads/2011/03/zabbix4.png"><img class="alignnone size-full wp-image-1096" title="zabbix4" src="http://blog.mfabrik.com/wp-content/uploads/2011/03/zabbix4.png" alt="" width="786" height="392" /></a></p>
  395. <p>Here is the corresponding diagram after optimizations from App Engine dashboard itself. These processing times are without network latency. As far as I know Google does not expose the endpoints of App Engine hosting, so you don&#8217;t know from which site of the world your responses come from. By comparing this diagram to the diagram above, you can see how Internet traffic is affecting to your App Engine application.</p>
  396. <p><a href="http://blog.mfabrik.com/wp-content/uploads/2011/03/appengine.png"><img class="alignnone size-full wp-image-1097" title="appengine" src="http://blog.mfabrik.com/wp-content/uploads/2011/03/appengine.png" alt="" width="824" height="280" /></a></p>
  397. <h2>The PITA of dying instances</h2>
  398. <p>For some reason, App Engine instances misbehave sometimes. This causes the HTTP requests die ungracefully.</p>
  399. <p>Normally it is not a problem as you lost few page loads now and then. People are used to &#8220;Internet grade&#8221; service and can hit the refresh button if they have problems opening a page.</p>
  400. <p>However if you are monitoring your site and the site gives an unnecessary alarm in the middle of the night, waking up your bastard operator from Hell, he will be very angry next morning and tell you to migrate the crappy software from unreliable Python / App Engine to more reliable PHP servers <img src='http://blog.mfabrik.com/wp-includes/images/smilies/icon_sad.gif' alt=':(' class='wp-smiley' /> </p>
  401. <p>This is what you see in App Engine logs:</p>
  402. <pre>A serious problem was encountered with the process that handled this request, causing it to exit.
  403. This is likely to cause a new process to be used for the next request to your application.
  404. If you see this message frequently, you may be throwing exceptions during the initialization of your application. (Error code 104)</pre>
  405. <p>After digging in deeper, you see that it is a problem of instating a new object in the database, exceeding 30 seconds hard limit for processing a HTTP request:</p>
  406. <pre>2011-03-09 05:06:20.794 / 500 30094ms 86cpu_ms 40api_cpu_ms
  407. 0kb Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1;
  408. .NET CLR 2.0.50727),gzip(gfe),gzip(gfe),gzip(gfe)
  409. &lt;class 'google.appengine.runtime.DeadlineExceededError'&gt;:
  410. Traceback (most recent call last):
  411. File "/base/data/home/apps/mfabrikkampagne/1.347249742610459821/main.py", line 494, in main
  412. run_wsgi_app(application)
  413. File "/base/python_runtime/python_lib/versions/1/google/appengine/ext/webapp/util.py", line 97, in run_wsgi_app
  414. run_bare_wsgi_app(add_wsgi_middleware(application))
  415. File "/base/python_runtime/python_lib/versions/1/google/appengine/ext/webapp/util.py", line 115, in run_bare_wsgi_app
  416. result = application(env, _start_response)
  417. File "/base/python_runtime/python_lib/versions/1/google/appengine/ext/webapp/__init__.py", line 515, in __call__
  418. handler.get(*groups)
  419. File "/base/data/home/apps/mfabrikkampagne/1.347249742610459821/main.py", line 296, in get
  420. try: self.session = Session()</pre>
  421. <p>So it looks like there is a temporary hick-up in Google App Engine&#8217;s Data Store (Big Table?). In the example above the error comes from <a href="http://gaeutilities.appspot.com/">gaeutilities</a>&#8216;s Session model, but it could be any other model.</p>
  422. <p><a href="http://code.google.com/intl/fi-FI/appengine/docs/python/runtime.html#The_Request_Timer">It is possible to catch DeadlineExceededError and temporarily work-around it, as shown in App Engine documentation</a>.</p>
  423. <p>The best way to handle this situation is to adjust your monitoring software &#8211; Zabbix in our case. Zabbix allows you to configure triggers so that they don&#8217;t alarm on every bad item state change. Instead, you can use <em>min()</em> function and trigger the alarm after the trigger condition has failed every time during a monitoring period. Just make sure that the trigger period is at least twice long as the update interval of your web scenario: this way Zabbix can logs at least two item state changes and allows one of them to be failed one.</p>
  424. <p>For example if</p>
  425. <ul>
  426. <li>Update interval of web scenario is 60 seconds</li>
  427. <li>Trigger function must check minimal failures of 1 during 2*60 seconds + some buffer = 150 seconds.</li>
  428. </ul>
  429. <pre>{xxx.fi:web.test.fail[de.mfabrik.com].min(150)}=1</pre>
  430. <p>This will allow one failed response before triggering the alarm.
  431. <p class="signature">
  432. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  433. </div><!-- .entry-content -->
  434. <div class="entry-utility">
  435. <span class="cat-links">
  436. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/appengine/" title="View all posts in appengine" rel="category tag">appengine</a>, <a href="http://blog.mfabrik.com/category/python/" title="View all posts in python" rel="category tag">python</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  437. <span class="meta-sep">|</span>
  438. <span class="tag-links">
  439. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/always-on/" rel="tag">always on</a>, <a href="http://blog.mfabrik.com/tag/appengine/" rel="tag">appengine</a>, <a href="http://blog.mfabrik.com/tag/cloud/" rel="tag">cloud</a>, <a href="http://blog.mfabrik.com/tag/cloud-computing/" rel="tag">cloud computing</a>, <a href="http://blog.mfabrik.com/tag/cron/" rel="tag">cron</a>, <a href="http://blog.mfabrik.com/tag/deadlineexceedederror/" rel="tag">DeadlineExceededError</a>, <a href="http://blog.mfabrik.com/tag/dynamic/" rel="tag">dynamic</a>, <a href="http://blog.mfabrik.com/tag/gaeutilities/" rel="tag">gaeutilities</a>, <a href="http://blog.mfabrik.com/tag/google/" rel="tag">google</a>, <a href="http://blog.mfabrik.com/tag/http-requiest/" rel="tag">http requiest</a>, <a href="http://blog.mfabrik.com/tag/instance/" rel="tag">instance</a>, <a href="http://blog.mfabrik.com/tag/monitoring/" rel="tag">monitoring</a>, <a href="http://blog.mfabrik.com/tag/new-process/" rel="tag">new process</a>, <a href="http://blog.mfabrik.com/tag/php/" rel="tag">php</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/response-time/" rel="tag">response time</a>, <a href="http://blog.mfabrik.com/tag/zabbix/" rel="tag">zabbix</a> </span>
  440. <span class="meta-sep">|</span>
  441. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/11/google-app-engine-issues-with-dynamic-instances-and-deadlineexceedederrors/#comments" title="Comment on Google App Engine: issues with dynamic instances and DeadlineExceededErrors">1 Comment</a></span>
  442. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1092&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  443. </div><!-- #post-## -->
  444. <div id="post-1089" class="post-1089 post type-post status-publish format-standard hentry category-python category-technology tag-cloud tag-ironpython tag-jython tag-microsoft tag-net tag-pypy tag-python tag-python-tools-for-visual-studio">
  445. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/10/visual-studio-and-microsoft-go-python/" title="Permalink to Visual Studio and Microsoft go Python" rel="bookmark">Visual Studio and Microsoft go Python</a></h2>
  446. <div class="entry-meta">
  447. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/10/visual-studio-and-microsoft-go-python/" title="10:11 pm" rel="bookmark"><span class="entry-date">March 10, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  448. <div class="entry-content">
  449. <p><a href="http://pytools.codeplex.com/">Microsoft Technical Computing Group has released a beta version for its Python integration for Visual Studio</a>.</p>
  450. <p>This is, indeed, interesting development, as it clearly shows that Python has reached a new level of  programming language maturity.  Receiving this much of attention from mighty Microsoft means that Python is no longer a mere prospect member in the cabin of enterprise solutions.</p>
  451. <p>Python Tools for Visual Studio are not focused only on Microsoft&#8217;s own .NET run-time: even Jython and PyPy are partially supported, claims the spec sheet. Looks like some kind of cloud integration is on its way &#8211; maybe Microsoft wants to challenge Google App Engine by providing even better cloud development tools?</p>
  452. <p>Also there seems to be more information coming in PyCon&#8230;
  453. <p class="signature">
  454. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  455. </div><!-- .entry-content -->
  456. <div class="entry-utility">
  457. <span class="cat-links">
  458. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/python/" title="View all posts in python" rel="category tag">python</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  459. <span class="meta-sep">|</span>
  460. <span class="tag-links">
  461. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/cloud/" rel="tag">cloud</a>, <a href="http://blog.mfabrik.com/tag/ironpython/" rel="tag">ironpython</a>, <a href="http://blog.mfabrik.com/tag/jython/" rel="tag">jython</a>, <a href="http://blog.mfabrik.com/tag/microsoft/" rel="tag">microsoft</a>, <a href="http://blog.mfabrik.com/tag/net/" rel="tag">net</a>, <a href="http://blog.mfabrik.com/tag/pypy/" rel="tag">pypy</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/python-tools-for-visual-studio/" rel="tag">python tools for visual studio</a> </span>
  462. <span class="meta-sep">|</span>
  463. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/10/visual-studio-and-microsoft-go-python/#respond" title="Comment on Visual Studio and Microsoft go Python">Leave a comment</a></span>
  464. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1089&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  465. </div><!-- #post-## -->
  466. <div id="post-1082" class="post-1082 post type-post status-publish format-standard hentry category-technology tag-adapter tag-interface tag-page-template tag-plone tag-portlet tag-render tag-zope">
  467. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/10/how%c2%a0to-render-a-portlet-in-plone/" title="Permalink to How to render a portlet in Plone" rel="bookmark">How to render a portlet in Plone</a></h2>
  468. <div class="entry-meta">
  469. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/10/how%c2%a0to-render-a-portlet-in-plone/" title="6:06 pm" rel="bookmark"><span class="entry-date">March 10, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  470. <div class="entry-content">
  471. <p>It&#8217;s easy <img src='http://blog.mfabrik.com/wp-includes/images/smilies/icon_smile.gif' alt=':)' class='wp-smiley' /> It took me only two years to figure this out.</p>
  472. <p>Below is an example how to render a portlet in Plone programmatically. This is useful when you want to have special page layouts and you need to include portlet output from another part of the site.</p>
  473. <ul>
  474. <li>Portlet machinery uses Zope&#8217;s adapter pattern extensively. This allows you to override things based on the content context, HTTP request, etc.</li>
  475. <li>A portlet is assigned to some context in some portlet manager</li>
  476. <li>We can dig these assignments up by portlet assignment id (not user visible) or portlet type (portlet assignment interface)</li>
  477. <li>Each portlet has its own overrideable renderer class</li>
  478. </ul>
  479. <p>This all makes everything flexible, though still not flexible enough for some use cases (blacklisting portlets). The downside is that accessing things through many abstraction layers and plug-in points (adaptions) is little cumbersome.</p>
  480. <p>Here is sample code for digging up a portlet and calling its renderer:</p>
  481. <pre> import Acquisition
  482. from zope.component import getUtility, getMultiAdapter, queryMultiAdapter
  483. from plone.portlets.interfaces import IPortletRetriever, IPortletManager, IPortletRenderer
  484. def get_portlet_manager(column):
  485. """ Return one of default Plone portlet managers.
  486. @param column: "plone.leftcolumn" or "plone.rightcolumn"
  487. @return: plone.portlets.interfaces.IPortletManagerRenderer instance
  488. """
  489. manager = getUtility(IPortletManager, name=column)
  490. return manager
  491. def render_portlet(context, request, view, manager, interface):
  492. """ Render a portlet defined in external location.
  493. .. note ::
  494. Portlets can be idenfied by id (not user visible)
  495. or interface (portlet class). This method supports look up
  496. by interface and will return the first matching portlet with this interface.
  497. @param context: Content item reference where portlet appear
  498. @param manager: IPortletManagerRenderer instance
  499. @param view: Current view or None if not available
  500. @param interface: Marker interface class we use to identify the portlet. E.g. IFacebookPortlet
  501. @return: Rendered portlet HTML as a string, or empty string if portlet not found
  502. """
  503. retriever = getMultiAdapter((context, manager), IPortletRetriever)
  504. portlets = retriever.getPortlets()
  505. assignment = None
  506. for portlet in portlets:
  507. # portlet is {'category': 'context', 'assignment': , 'name': u'facebook-like-box', 'key': '/isleofback/sisalto/huvit-ja-harrasteet
  508. # Identify portlet by interface provided by assignment
  509. if interface.providedBy(portlet["assignment"]):
  510. assignment = portlet["assignment"]
  511. break
  512. if assignment is None:
  513. # Did not find a portlet
  514. return ""
  515. #- A special type of content provider, IPortletRenderer, knows how to render each
  516. #type of portlet. The IPortletRenderer should be a multi-adapter from
  517. #(context, request, view, portlet manager, data provider).
  518. renderer = queryMultiAdapter((context, request, view, manager, assignment), IPortletRenderer)
  519. # Make sure we have working acquisition chain
  520. renderer = renderer.__of__(context)
  521. if renderer is None:
  522. raise RuntimeError("No portlet renderer found for portlet assignment:" + str(assignment))
  523. renderer.update()
  524. # Does not check visibility here... force render always
  525. html = renderer.render()
  526. return html</pre>
  527. <p>This is how you integrate it to your view class:</p>
  528. <pre> def render_slope_info(self):
  529. """ Render a portlet from another page in-line to this page
  530. Does not render other portlets in the same portlet manager.
  531. """
  532. context = self.context.aq_inner
  533. request = self.request
  534. view = self
  535. column = "isleofback.app.frontpageportlets"
  536. # Our custom interface marking a portlet
  537. from isleofback.app.portlets.slopeinfo import ISlopeInfo
  538. manager = get_portlet_manager(column)
  539. html = render_portlet(context, request, view, manager, ISlopeInfo)
  540. return html</pre>
  541. <p>&#8230;and this is how you call your view helper method from TAL page template:</p>
  542. <pre> &lt;div tal:replace="structure view/render_slope_info" /&gt;</pre>
  543. <p class="signature">
  544. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  545. </div><!-- .entry-content -->
  546. <div class="entry-utility">
  547. <span class="cat-links">
  548. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  549. <span class="meta-sep">|</span>
  550. <span class="tag-links">
  551. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/adapter/" rel="tag">adapter</a>, <a href="http://blog.mfabrik.com/tag/interface/" rel="tag">interface</a>, <a href="http://blog.mfabrik.com/tag/page-template/" rel="tag">page template</a>, <a href="http://blog.mfabrik.com/tag/plone/" rel="tag">plone</a>, <a href="http://blog.mfabrik.com/tag/portlet/" rel="tag">portlet</a>, <a href="http://blog.mfabrik.com/tag/render/" rel="tag">render</a>, <a href="http://blog.mfabrik.com/tag/zope/" rel="tag">zope</a> </span>
  552. <span class="meta-sep">|</span>
  553. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/10/how%c2%a0to-render-a-portlet-in-plone/#respond" title="Comment on How to render a portlet in Plone">Leave a comment</a></span>
  554. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1082&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  555. </div><!-- #post-## -->
  556. <div id="post-1078" class="post-1078 post type-post status-publish format-standard hentry category-plone category-technology tag-ajax tag-comments tag-discussion tag-disqus tag-javascript tag-jquery tag-lazy-load tag-plone tag-static tag-varnish">
  557. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/09/lazily-load-elements-becoming-visible-using-jquery/" title="Permalink to Lazily load elements becoming visible using jQuery" rel="bookmark">Lazily load elements becoming visible using jQuery</a></h2>
  558. <div class="entry-meta">
  559. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/09/lazily-load-elements-becoming-visible-using-jquery/" title="12:57 pm" rel="bookmark"><span class="entry-date">March 9, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  560. <div class="entry-content">
  561. <p>It is a useful trick to lazily load comments or such elements at the bottom of page. Some elements may be loaded only when they are scrolled visible.</p>
  562. <ul>
  563. <li>All users are not interested in the information and do not necessary read the article long enough to see it</li>
  564. <li>By lazily loading such elements one can speed up the initial page load time</li>
  565. <li>You save bandwidth</li>
  566. <li>If you use AJAX for the dynamic elements of the page you can more easily cache your pages in static page cache (Varnish) even if the pages contain personalized bits</li>
  567. </ul>
  568. <p>For example, Disqus is doing this (see <a href="http://api.jquery.com/jQuery.ajax/">comments in jQuery API documentation</a>).</p>
  569. <p>You can achieve this with <a href="http://remysharp.com/2009/01/26/element-in-view-event-plugin/">in-view plug-in for jQuery</a>.</p>
  570. <p>Below is an example for Plone triggering <span style="text-decoration: underline;"><em>productappreciation_view</em></span> loading when our placeholder <em>div</em> tag becomes visible.</p>
  571. <pre>...
  572. &lt;head&gt;
  573. &lt;script type="text/javascript" tal:attributes="src string:${portal_url}/++resource++your.app/in-view.js"&gt;&lt;/script&gt;
  574. &lt;/head&gt;
  575. ...
  576. &lt;div id="comment-placefolder"&gt;
  577. &lt;!-- Display spinning AJAX indicator gif until our AJAX call completes --&gt;
  578. &lt;p&gt;
  579. &lt;!-- Image is in Products.CMFPlone/skins/plone_images --&gt;
  580. &lt;img tal:attributes="src string:${context/@@plone_portal_state/portal_url}/spinner.gif" /&gt; Loading comments
  581. &lt;/p&gt;
  582. &lt;!-- Hidden link to a view URL which will render the view containing the snippet for comments --&gt;                       
  583. &lt;a rel="nofollow" style="display:none" tal:attributes="href string:${context/absolute_url}/productappreciation_view" /&gt;
  584. &lt;script&gt;
  585. jq(document).ready(function() {
  586. // http://remysharp.com/2009/01/26/element-in-view-event-plugin/                                        
  587. jq("#comment-placeholder").bind("inview", function() {
  588. // This function is executed when the placeholder becomes visible
  589. // Extract URL from HTML page
  590. var commentURL = jq("#comment-placeholder a").attr("href");
  591. if (commentURL) {
  592. // Trigger AJAX call
  593. jq("#comment-placeholder").load(commentURL);
  594. }
  595. });                                     
  596. });     
  597. &lt;/script&gt;
  598. &lt;/div&gt;</pre>
  599. <p class="signature">
  600. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  601. </div><!-- .entry-content -->
  602. <div class="entry-utility">
  603. <span class="cat-links">
  604. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/plone/" title="View all posts in plone" rel="category tag">plone</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  605. <span class="meta-sep">|</span>
  606. <span class="tag-links">
  607. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/ajax/" rel="tag">ajax</a>, <a href="http://blog.mfabrik.com/tag/comments/" rel="tag">comments</a>, <a href="http://blog.mfabrik.com/tag/discussion/" rel="tag">discussion</a>, <a href="http://blog.mfabrik.com/tag/disqus/" rel="tag">disqus</a>, <a href="http://blog.mfabrik.com/tag/javascript/" rel="tag">javascript</a>, <a href="http://blog.mfabrik.com/tag/jquery/" rel="tag">jquery</a>, <a href="http://blog.mfabrik.com/tag/lazy-load/" rel="tag">lazy load</a>, <a href="http://blog.mfabrik.com/tag/plone/" rel="tag">plone</a>, <a href="http://blog.mfabrik.com/tag/static/" rel="tag">static</a>, <a href="http://blog.mfabrik.com/tag/varnish/" rel="tag">varnish</a> </span>
  608. <span class="meta-sep">|</span>
  609. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/09/lazily-load-elements-becoming-visible-using-jquery/#comments" title="Comment on Lazily load elements becoming visible using jQuery">4 Comments</a></span>
  610. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1078&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  611. </div><!-- #post-## -->
  612. <div id="post-1073" class="post-1073 post type-post status-publish format-standard hentry category-python category-technology tag-crawl tag-find tag-follow tag-full-text tag-full-text-search tag-pdf tag-pypdf tag-python tag-scrape tag-scrapy tag-search tag-trademark tag-user-policy tag-violation tag-web-crawler">
  613. <h2 class="entry-title"><a href="http://blog.mfabrik.com/2011/03/08/installing-and-using-scrapy-web-crawler-to-search-text-on-multiple-sites/" title="Permalink to Installing and using Scrapy web crawler to search text on multiple sites" rel="bookmark">Installing and using Scrapy web crawler to search text on multiple sites</a></h2>
  614. <div class="entry-meta">
  615. <span class="meta-prep meta-prep-author">Posted on</span> <a href="http://blog.mfabrik.com/2011/03/08/installing-and-using-scrapy-web-crawler-to-search-text-on-multiple-sites/" title="4:37 pm" rel="bookmark"><span class="entry-date">March 8, 2011</span></a> <span class="meta-sep">by</span> <span class="author vcard"><a class="url fn n" href="http://blog.mfabrik.com/author/moo/" title="View all posts by Mikko Ohtamaa">Mikko Ohtamaa</a></span> </div><!-- .entry-meta -->
  616. <div class="entry-content">
  617. <p>Here is a little script to use <a href="http://scrapy.org/">Scrapy</a>, a web crawling framework for Python, to search sites for references for certain texts including link content and PDFs. This is handy for cases where you need to find links violating the user policy,  trademarks which are not allowed or just to see where your template output is being used.  Our Scrapy example differs from a normal search engine as it does HTML source code level checking: you can also search for CSS classes, link targets and other elements which may be invisible for normal search engines.</p>
  618. <p>Scrapy comes with a command-line tool and project skeleton generator. You need to generate your own Scrapy project to where you can then add your own spider classes.</p>
  619. <p>Install Scrapy using Distribute (or setuptools):</p>
  620. <pre>easy_install Scrapy</pre>
  621. <p>Create project code skeleton:</p>
  622. <pre>scrapy startproject myscraper</pre>
  623. <p>Add your spider class skeleton by creating a file <em>myscraper/spiders/spiders.py</em>:</p>
  624. <pre>from scrapy.contrib.spiders import CrawlSpider, Rule
  625. from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
  626. class MySpider(CrawlSpider):
  627. """ Crawl through web sites you specify """
  628. name = "mycrawler"
  629. # Stay within these domains when crawling
  630. allowed_domains = ["www.mysite.com"]
  631. start_urls = [
  632. "http://www.mysite.com/",
  633. ]
  634. # Add our callback which will be called for every found link
  635. rules = [
  636. Rule(SgmlLinkExtractor(), follow=True)
  637. ]</pre>
  638. <p>Start Scrapy to test it&#8217;s crawling properly. Run the following the top level directoty:</p>
  639. <pre>scrapy crawl mycrawler</pre>
  640. <p>You should see output like:</p>
  641. <pre>2011-03-08 15:25:52+0200 [scrapy] INFO: Scrapy 0.12.0.2538 started (bot: myscraper)
  642. 2011-03-08 15:25:52+0200 [scrapy] DEBUG: Enabled extensions: TelnetConsole, SpiderContext, WebService, CoreStats, MemoryUsage, CloseSpider
  643. 2011-03-08 15:25:52+0200 [scrapy] DEBUG: Enabled scheduler middlewares: DuplicatesFilterMiddleware</pre>
  644. <p>You can hit CTRL+C to interrupt scrapy.</p>
  645. <p>Then let&#8217;s enhance the spider a bit to search for a blacklisted tags, with optional whitelisting in myscraper/spiders/spiders.py. We use also <a href="http://pybrary.net/pyPdf/">pyPdf</a> library to crawl inside PDF files:</p>
  646. <pre>"""
  647. A sample crawler for seeking a text on sites.
  648. """
  649. import StringIO
  650. from functools import partial
  651. from scrapy.http import Request
  652. from scrapy.spider import BaseSpider
  653. from scrapy.contrib.spiders import CrawlSpider, Rule
  654. from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
  655. from scrapy.item import Item
  656. def find_all_substrings(string, sub):
  657. """
  658. http://code.activestate.com/recipes/499314-find-all-indices-of-a-substring-in-a-given-string/
  659. """
  660. import re
  661. starts = [match.start() for match in re.finditer(re.escape(sub), string)]
  662. return starts
  663. class MySpider(CrawlSpider):
  664. """ Crawl through web sites you specify """
  665. name = "mycrawler"
  666. # Stay within these domains when crawling
  667. allowed_domains = ["www.mysite.com", "www.mysite2.com", "intranet.mysite.com"]
  668. start_urls = [
  669. "http://www.mysite.com/",
  670. "http://www.mysite2.com/",
  671. "http://intranet.mysite.com/"
  672. ]
  673. # Add our callback which will be called for every found link
  674. rules = [
  675. Rule(SgmlLinkExtractor(), follow=True, callback="check_violations")
  676. ]
  677. # How many pages crawled? XXX: Was not sure if CrawlSpider is a singleton class
  678. crawl_count = 0
  679. # How many text matches we have found
  680. violations = 0
  681. def get_pdf_text(self, response):
  682. """ Peek inside PDF to check possible violations.
  683. @return: PDF content as searcable plain-text string
  684. """
  685. try:
  686. from pyPdf import PdfFileReader
  687. except ImportError:
  688. print "Needed: easy_install pyPdf"
  689. raise
  690. stream = StringIO.StringIO(response.body)
  691. reader = PdfFileReader(stream)
  692. text = u""
  693. if reader.getDocumentInfo().title:
  694. # Title is optional, may be None
  695. text += reader.getDocumentInfo().title
  696. for page in reader.pages:
  697. # XXX: Does handle unicode properly?
  698. text += page.extractText()
  699. return text
  700. def check_violations(self, response):
  701. """ Check a server response page (file) for possible violations """
  702. # Do some user visible status reporting
  703. self.__class__.crawl_count += 1
  704. crawl_count = self.__class__.crawl_count
  705. if crawl_count % 100 == 0:
  706. # Print some progress output
  707. print "Crawled %d pages" % crawl_count
  708. # Entries which are not allowed to appear in content.
  709. # These are case-sensitive
  710. blacklist = ["meat", "ham" ]
  711. # Enteries which are allowed to appear. They are usually
  712. # non-human visible data, like CSS classes, and may not be interesting business wise
  713. exceptions_after = [ "meatball",
  714. "hamming",
  715. "hamburg"
  716. ]
  717. # These are predencing string where our match is allowed
  718. exceptions_before = [
  719. "bushmeat",
  720. "honeybaked ham"
  721. ]
  722. url = response.url
  723. # Check response content type to identify what kind of payload this link target is
  724. ct = response.headers.get("content-type", "").lower()
  725. if "pdf" in ct:
  726. # Assume a PDF file
  727. data = self.get_pdf_text(response)
  728. else:
  729. # Assume it's HTML
  730. data = response.body
  731. # Go through our search goals to identify any "bad" text on the page
  732. for tag in blacklist:
  733. substrings = find_all_substrings(data, tag)
  734. # Check entries against the exception list for "allowed" special cases
  735. for pos in substrings:
  736. ok = False
  737. for exception in exceptions_after:
  738. sample = data[pos:pos+len(exception)]
  739. if sample == exception:
  740. #print "Was whitelisted special case:" + sample
  741. ok = True
  742. break
  743. for exception in exceptions_before:
  744. sample = data[pos - len(exception) + len(tag): pos+len(tag) ]
  745. #print "For %s got sample %s" % (exception, sample)
  746. if sample == exception:
  747. #print "Was whitelisted special case:" + sample
  748. ok = True
  749. break
  750. if not ok:
  751. self.__class__.violations += 1
  752. print "Violation number %d" % self.__class__.violations
  753. print "URL %s" % url
  754. print "Violating text:" + tag
  755. print "Position:" + str(pos)
  756. piece = data[pos-40:pos+40].encode("utf-8")
  757. print "Sample text around position:" + piece.replace("\n", " ")
  758. print "------"
  759. # We are not actually storing any data, return dummy item
  760. return Item()
  761. def _requests_to_follow(self, response):
  762. if getattr(response, "encoding", None) != None:
  763. # Server does not set encoding for binary files
  764. # Do not try to follow links in
  765. # binary data, as this will break Scrapy
  766. return CrawlSpider._requests_to_follow(self, response)
  767. else:
  768. return []</pre>
  769. <p>Let&#8217;s tune down logging output level, so we get only relevant data in the output. In <em>myscaper/settings.py</em> add:</p>
  770. <pre>LOG_LEVEL="INFO"</pre>
  771. <p>Now you can run the crawler and pipe the output to a text file:</p>
  772. <pre>scrapy crawl mycrawler &gt; violations.txt</pre>
  773. <p>More information</p>
  774. <ul>
  775. <li><a href="http://doc.scrapy.org/intro/install.html#intro-install-easy">Scrapy manual</a></li>
  776. </ul>
  777. <p class="signature">
  778. <a href="http://mfabrik.com/@@zoho-contact-form"><img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/mfabrik-24.png"></img></a> <a href="http://mfabrik.com/@@zoho-contact-form"">Get developers</a> <a href="http://twitter.com/mfabrik"> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml"><img valign="middle" src="http://www.feedburner.com/fb/images/pub/feed-icon16x16.png" alt="" style="vertical-align:middle;border:0"/></a> <a href="http://feeds.feedburner.com/mFabrikWebAndMobileDevelopment" rel="alternate" type="application/rss+xml">Subscribe mFabrik blog in a reader</a> <img valign="middle" src="http://blog.mfabrik.com/wp-content/uploads/twitter-24.png"></img></a> <a href="http://twitter.com/moo9000">Follow me on Twitter</a></p>
  779. </div><!-- .entry-content -->
  780. <div class="entry-utility">
  781. <span class="cat-links">
  782. <span class="entry-utility-prep entry-utility-prep-cat-links">Posted in</span> <a href="http://blog.mfabrik.com/category/python/" title="View all posts in python" rel="category tag">python</a>, <a href="http://blog.mfabrik.com/category/technology/" title="View all posts in technology" rel="category tag">technology</a> </span>
  783. <span class="meta-sep">|</span>
  784. <span class="tag-links">
  785. <span class="entry-utility-prep entry-utility-prep-tag-links">Tagged</span> <a href="http://blog.mfabrik.com/tag/crawl/" rel="tag">crawl</a>, <a href="http://blog.mfabrik.com/tag/find/" rel="tag">find</a>, <a href="http://blog.mfabrik.com/tag/follow/" rel="tag">follow</a>, <a href="http://blog.mfabrik.com/tag/full-text/" rel="tag">full text</a>, <a href="http://blog.mfabrik.com/tag/full-text-search/" rel="tag">full text search</a>, <a href="http://blog.mfabrik.com/tag/pdf/" rel="tag">pdf</a>, <a href="http://blog.mfabrik.com/tag/pypdf/" rel="tag">pypdf</a>, <a href="http://blog.mfabrik.com/tag/python/" rel="tag">python</a>, <a href="http://blog.mfabrik.com/tag/scrape/" rel="tag">scrape</a>, <a href="http://blog.mfabrik.com/tag/scrapy/" rel="tag">scrapy</a>, <a href="http://blog.mfabrik.com/tag/search/" rel="tag">search</a>, <a href="http://blog.mfabrik.com/tag/trademark/" rel="tag">trademark</a>, <a href="http://blog.mfabrik.com/tag/user-policy/" rel="tag">user policy</a>, <a href="http://blog.mfabrik.com/tag/violation/" rel="tag">violation</a>, <a href="http://blog.mfabrik.com/tag/web-crawler/" rel="tag">web crawler</a> </span>
  786. <span class="meta-sep">|</span>
  787. <span class="comments-link"><a href="http://blog.mfabrik.com/2011/03/08/installing-and-using-scrapy-web-crawler-to-search-text-on-multiple-sites/#respond" title="Comment on Installing and using Scrapy web crawler to search text on multiple sites">Leave a comment</a></span>
  788. <span class="meta-sep">|</span> <span class="edit-link"><a class="post-edit-link" href="http://blog.mfabrik.com/wp-admin/post.php?post=1073&amp;action=edit" title="Edit Post">Edit</a></span> </div><!-- .entry-utility -->
  789. </div><!-- #post-## -->
  790. <div id="nav-below" class="navigation">
  791. <div class="nav-previous"><a href="http://blog.mfabrik.com/page/2/" ><span class="meta-nav">&larr;</span> Older posts</a></div>
  792. <div class="nav-next"></div>
  793. </div><!-- #nav-below -->
  794. </div><!-- #content -->
  795. </div><!-- #container -->
  796. <div id="primary" class="widget-area" role="complementary">
  797. <ul class="xoxo">
  798. <li id="search" class="widget-container widget_search">
  799. <form role="search" method="get" id="searchform" action="http://blog.mfabrik.com/" >
  800. <div><label class="screen-reader-text" for="s">Search for:</label>
  801. <input type="text" value="" name="s" id="s" />
  802. <input type="submit" id="searchsubmit" value="Search" />
  803. </div>
  804. </form> </li>
  805. <li id="archives" class="widget-container">
  806. <h3 class="widget-title">Archives</h3>
  807. <ul>
  808. <li><a href='http://blog.mfabrik.com/2011/03/' title='March 2011'>March 2011</a></li>
  809. <li><a href='http://blog.mfabrik.com/2011/02/' title='February 2011'>February 2011</a></li>
  810. <li><a href='http://blog.mfabrik.com/2011/01/' title='January 2011'>January 2011</a></li>
  811. <li><a href='http://blog.mfabrik.com/2010/12/' title='December 2010'>December 2010</a></li>
  812. <li><a href='http://blog.mfabrik.com/2010/11/' title='November 2010'>November 2010</a></li>
  813. <li><a href='http://blog.mfabrik.com/2010/10/' title='October 2010'>October 2010</a></li>
  814. <li><a href='http://blog.mfabrik.com/2010/09/' title='September 2010'>September 2010</a></li>
  815. <li><a href='http://blog.mfabrik.com/2010/08/' title='August 2010'>August 2010</a></li>
  816. <li><a href='http://blog.mfabrik.com/2010/07/' title='July 2010'>July 2010</a></li>
  817. <li><a href='http://blog.mfabrik.com/2010/06/' title='June 2010'>June 2010</a></li>
  818. <li><a href='http://blog.mfabrik.com/2010/05/' title='May 2010'>May 2010</a></li>
  819. <li><a href='http://blog.mfabrik.com/2010/04/' title='April 2010'>April 2010</a></li>
  820. <li><a href='http://blog.mfabrik.com/2010/03/' title='March 2010'>March 2010</a></li>
  821. <li><a href='http://blog.mfabrik.com/2010/01/' title='January 2010'>January 2010</a></li>
  822. <li><a href='http://blog.mfabrik.com/2009/12/' title='December 2009'>December 2009</a></li>
  823. <li><a href='http://blog.mfabrik.com/2009/11/' title='November 2009'>November 2009</a></li>
  824. <li><a href='http://blog.mfabrik.com/2009/10/' title='October 2009'>October 2009</a></li>
  825. <li><a href='http://blog.mfabrik.com/2009/09/' title='September 2009'>September 2009</a></li>
  826. <li><a href='http://blog.mfabrik.com/2009/08/' title='August 2009'>August 2009</a></li>
  827. <li><a href='http://blog.mfabrik.com/2009/07/' title='July 2009'>July 2009</a></li>
  828. <li><a href='http://blog.mfabrik.com/2009/02/' title='February 2009'>February 2009</a></li>
  829. <li><a href='http://blog.mfabrik.com/2008/12/' title='December 2008'>December 2008</a></li>
  830. <li><a href='http://blog.mfabrik.com/2008/11/' title='November 2008'>November 2008</a></li>
  831. <li><a href='http://blog.mfabrik.com/2008/10/' title='October 2008'>October 2008</a></li>
  832. <li><a href='http://blog.mfabrik.com/2008/09/' title='September 2008'>September 2008</a></li>
  833. <li><a href='http://blog.mfabrik.com/2008/08/' title='August 2008'>August 2008</a></li>
  834. <li><a href='http://blog.mfabrik.com/2008/07/' title='July 2008'>July 2008</a></li>
  835. <li><a href='http://blog.mfabrik.com/2008/06/' title='June 2008'>June 2008</a></li>
  836. <li><a href='http://blog.mfabrik.com/2008/05/' title='May 2008'>May 2008</a></li>
  837. <li><a href='http://blog.mfabrik.com/2008/04/' title='April 2008'>April 2008</a></li>
  838. <li><a href='http://blog.mfabrik.com/2008/03/' title='March 2008'>March 2008</a></li>
  839. <li><a href='http://blog.mfabrik.com/2008/02/' title='February 2008'>February 2008</a></li>
  840. <li><a href='http://blog.mfabrik.com/2007/11/' title='November 2007'>November 2007</a></li>
  841. <li><a href='http://blog.mfabrik.com/2007/10/' title='October 2007'>October 2007</a></li>
  842. <li><a href='http://blog.mfabrik.com/2007/09/' title='September 2007'>September 2007</a></li>
  843. <li><a href='http://blog.mfabrik.com/2007/08/' title='August 2007'>August 2007</a></li>
  844. <li><a href='http://blog.mfabrik.com/2007/07/' title='July 2007'>July 2007</a></li>
  845. </ul>
  846. </li>
  847. <li id="meta" class="widget-container">
  848. <h3 class="widget-title">Meta</h3>
  849. <ul>
  850. <li><a href="http://blog.mfabrik.com/wp-admin/">Site Admin</a></li> <li><a href="http://blog.mfabrik.com/wp-login.php?action=logout&#038;_wpnonce=c5a860867b">Log out</a></li>
  851. </ul>
  852. </li>
  853. </ul>
  854. </div><!-- #primary .widget-area -->
  855. </div><!-- #main -->
  856. <div id="footer" role="contentinfo">
  857. <div id="colophon">
  858. <div id="site-info">
  859. <a href="http://blog.mfabrik.com/" title="mFabrik &#8211; mobile sites, apps, HTML5 and CMS software development" rel="home">
  860. mFabrik &#8211; mobile sites, apps, HTML5 and CMS software development </a>
  861. </div><!-- #site-info -->
  862. <div id="site-generator">
  863. <a href="http://wordpress.org/" title="Semantic Personal Publishing Platform" rel="generator">Proudly powered by WordPress.</a>
  864. </div><!-- #site-generator -->
  865. </div><!-- #colophon -->
  866. </div><!-- #footer -->
  867. </div><!-- #wrapper -->
  868. <!-- tracker not added by Ultimate Google Analytics plugin v1.5.3: http://www.oratransplant.nl/uga -->
  869. <!-- tracker is not added for a logged on user of this level -->
  870. <!-- SyntaxHighlighter Start -->
  871. <script type="text/javascript" src="http://blog.mfabrik.com/wp-content/plugins/syntax/Scripts/shCore.js"></script>
  872. <script type="text/javascript" src="http://blog.mfabrik.com/wp-content/plugins/syntax/Scripts/shBrushPython.js"></script>
  873. <script type="text/javascript">dp.SyntaxHighlighter.HighlightAll('code');</script>
  874. <!-- SyntaxHighlighter End -->
  875. </body>
  876. </html>