/modules/mod_twitter/mod_twitter.erl

https://code.google.com/p/zotonic/ · Erlang · 448 lines · 262 code · 70 blank · 116 comment · 11 complexity · b9b3d88504d945cc1a38c22b522e8e8a MD5 · raw file

  1. %% @author Arjan Scherpenisse <arjan@scherpenisse.net>
  2. %% @copyright 2009 Arjan Scherpenisse
  3. %% Date: 2009-12-10
  4. %% @doc Follow users on Twitter using the streaming HTTP API.
  5. %%
  6. %% Setup instructions:
  7. %% * Enable the mod_twitter module
  8. %% * Put your login/password in the config keys mod_twitter.api_login
  9. %% and mod_twitter.api_password, respectively.
  10. %% * Create a person in the Zotonic database, find a twitter ID on
  11. %% twitter, and put it in the person record on the admin edit page
  12. %% (sidebar)
  13. %% * The module will start automatically to follow the users which have a twitter id set.
  14. %% Copyright 2009 Arjan Scherpenisse
  15. %%
  16. %% Licensed under the Apache License, Version 2.0 (the "License");
  17. %% you may not use this file except in compliance with the License.
  18. %% You may obtain a copy of the License at
  19. %%
  20. %% http://www.apache.org/licenses/LICENSE-2.0
  21. %%
  22. %% Unless required by applicable law or agreed to in writing, software
  23. %% distributed under the License is distributed on an "AS IS" BASIS,
  24. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. %% See the License for the specific language governing permissions and
  26. %% limitations under the License.
  27. -module(mod_twitter).
  28. -author("Arjan Scherpenisse <arjan@scherpenisse.net>").
  29. -behaviour(gen_server).
  30. -mod_title("Twitter").
  31. -mod_description("Follow persons from Zotonic on Twitter using the streaming API.").
  32. -mod_prio(200).
  33. %% gen_server exports
  34. -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
  35. -export([start_link/1]).
  36. %% interface functions
  37. -export([
  38. datamodel/0,
  39. fetch/4,
  40. observe_rsc_update_done/2,
  41. receive_chunk/2
  42. ]).
  43. -include_lib("zotonic.hrl").
  44. -record(state, {context, twitter_pid=undefined}).
  45. observe_rsc_update_done(#rsc_update_done{id=Id}, Context) ->
  46. case m_rsc:p(Id, twitter_id, Context) of
  47. undefined ->
  48. ok;
  49. TwitterId ->
  50. NonEmptyNewId = case TwitterId of
  51. X when X =:= [] orelse X =:= <<>> orelse X =:= undefined -> false;
  52. _ -> true
  53. end,
  54. Restart = case m_identity:get_rsc(Id, twitter_id, Context) of
  55. L when is_list(L) ->
  56. case proplists:get_value(key, L) of
  57. TwitterId ->
  58. %% not changed
  59. false;
  60. _ ->
  61. m_identity:delete(proplists:get_value(id, L), Context),
  62. true
  63. end;
  64. _ -> NonEmptyNewId
  65. end,
  66. case NonEmptyNewId of
  67. true -> m_identity:insert(Id, twitter_id, TwitterId, Context);
  68. _ -> ignore
  69. end,
  70. case Restart of
  71. true ->
  72. z_notifier:notify(restart_twitter, Context);
  73. false -> ok
  74. end
  75. end.
  76. %%====================================================================
  77. %% API
  78. %%====================================================================
  79. %% @spec start_link(Args) -> {ok,Pid} | ignore | {error,Error}
  80. %% @doc Starts the server
  81. start_link(Args) when is_list(Args) ->
  82. gen_server:start_link(?MODULE, Args, []).
  83. %%====================================================================
  84. %% gen_server callbacks
  85. %%====================================================================
  86. %% @spec init(Args) -> {ok, State} |
  87. %% {ok, State, Timeout} |
  88. %% ignore |
  89. %% {stop, Reason}
  90. %% @doc Initiates the server. The datamodel is installed before the server is started.
  91. init(Args) ->
  92. process_flag(trap_exit, true),
  93. {context, Context} = proplists:lookup(context, Args),
  94. handle_author_edges_upgrade(Context),
  95. z_notifier:observe(restart_twitter, self(), Context),
  96. %% Start the twitter process
  97. case start_following(Context) of
  98. Pid when is_pid(Pid) ->
  99. {ok, #state{context=z_context:new(Context),twitter_pid=Pid}};
  100. undefined ->
  101. {ok, #state{context=z_context:new(Context)}};
  102. not_configured ->
  103. z_session_manager:broadcast(#broadcast{type="error", message="No configuration (mod_twitter.api_login / mod_twitter.api_password) found, not starting.", title="Twitter", stay=true}, z_acl:sudo(Context)),
  104. ignore
  105. end.
  106. %% @spec handle_call(Request, From, State) -> {reply, Reply, State} |
  107. %% {reply, Reply, State, Timeout} |
  108. %% {noreply, State} |
  109. %% {noreply, State, Timeout} |
  110. %% {stop, Reason, Reply, State} |
  111. %% {stop, Reason, State}
  112. %% @doc Trap unknown calls
  113. handle_call(Message, _From, State) ->
  114. {stop, {unknown_call, Message}, State}.
  115. handle_cast({restart_twitter, _Context}, #state{context=Context,twitter_pid=Pid}=State) ->
  116. case Pid of
  117. undefined ->
  118. %% not running
  119. Pid2 = start_following(Context),
  120. {noreply, #state{context=Context,twitter_pid=Pid2}};
  121. _ ->
  122. %% Exit the process; will be started again.
  123. erlang:exit(Pid, restarting),
  124. {noreply, State#state{twitter_pid=undefined}}
  125. end;
  126. handle_cast(Message, State) ->
  127. {stop, {unknown_cast, Message}, State}.
  128. %% @spec handle_info(Info, State) -> {noreply, State} |
  129. %% {noreply, State, Timeout} |
  130. %% {stop, Reason, State}
  131. %% @doc Handling all non call/cast messages
  132. handle_info({'EXIT', _Pid, restarting}, #state{context=Context}=State) ->
  133. timer:sleep(500),
  134. Pid=start_following(Context),
  135. {noreply, State#state{twitter_pid=Pid}};
  136. handle_info({'EXIT', _Pid, {error, _Reason}}, #state{context=Context}=State) ->
  137. timer:sleep(15000),
  138. Pid=start_following(Context),
  139. {noreply, State#state{twitter_pid=Pid}};
  140. handle_info(_Info, State) ->
  141. {noreply, State}.
  142. %% @spec terminate(Reason, State) -> void()
  143. %% @doc This function is called by a gen_server when it is about to
  144. %% terminate. It should be the opposite of Module:init/1 and do any necessary
  145. %% cleaning up. When it returns, the gen_server terminates with Reason.
  146. %% The return value is ignored.
  147. terminate(_Reason, State) ->
  148. z_notifier:observe(restart_twitter, self(), State#state.context),
  149. ok.
  150. %% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
  151. %% @doc Convert process state when code is changed
  152. code_change(_OldVsn, State, _Extra) ->
  153. {ok, State}.
  154. %%====================================================================
  155. %% Internal functions
  156. %%====================================================================
  157. start_following(Context) ->
  158. Login = case m_config:get_value(?MODULE, api_login, false, Context) of
  159. LB when is_binary(LB) ->
  160. binary_to_list(LB);
  161. L -> L
  162. end,
  163. Pass = case m_config:get_value(?MODULE, api_password, false, Context) of
  164. LP when is_binary(LP) ->
  165. binary_to_list(LP);
  166. P -> P
  167. end,
  168. case Login of
  169. false ->
  170. error_logger:info_msg("No username/password configuration for mod_twitter. ~n"),
  171. not_configured;
  172. _ ->
  173. %% Get list of twitter ids to follow
  174. Follow1 = [binary_to_list(V) || {V} <- z_db:q("SELECT key FROM identity WHERE type = 'twitter_id' LIMIT 400", Context)],
  175. case Follow1 of
  176. [] ->
  177. error_logger:info_msg("No follow configuration for mod_twitter. ~n"),
  178. undefined;
  179. _ ->
  180. URL = "http://" ++ Login ++ ":" ++ Pass ++ "@stream.twitter.com/1/statuses/filter.json",
  181. Follow = z_utils:combine(",", Follow1),
  182. Body = lists:flatten("follow=" ++ Follow),
  183. z_session_manager:broadcast(#broadcast{type="notice", message="Now waiting for tweets to arrive...", title="Twitter", stay=false}, Context),
  184. spawn_link(?MODULE, fetch, [URL, Body, 5, Context])
  185. end
  186. end.
  187. %%
  188. %% Main fetch process
  189. %%
  190. fetch(URL, Body, Sleep, Context) ->
  191. case http:request(post,
  192. {URL, [], "application/x-www-form-urlencoded", Body},
  193. [],
  194. [{sync, false},
  195. {stream, self},
  196. {verbose, trace}]) of
  197. {ok, RequestId} ->
  198. case receive_chunk(RequestId, Context) of
  199. {ok, _} ->
  200. % stream broke normally retry
  201. timer:sleep(Sleep * 1000),
  202. fetch(URL, Body, Sleep, Context);
  203. {error, timeout} ->
  204. error_logger:info_msg("Timeout ~n"),
  205. timer:sleep(Sleep * 1000),
  206. fetch(URL, Body, Sleep, Context);
  207. {error, Reason} ->
  208. error_logger:error_msg("Error ~p ~n", [Reason]),
  209. timer:sleep(Sleep * 1000),
  210. exit({error, Reason})
  211. end;
  212. _Reason ->
  213. error_logger:error_msg("Error ~p ~n", [_Reason]),
  214. timer:sleep(Sleep * 1000),
  215. fetch(URL, Body, Sleep, Context)
  216. end.
  217. %
  218. % this is the tweet handler persumably you could do something useful here
  219. %
  220. process_data(Data, Context) ->
  221. case Data of
  222. <<${, _/binary>> ->
  223. {struct, Tweet} = mochijson:decode(Data),
  224. AsyncContext = z_context:prune_for_async(Context),
  225. F = fun() ->
  226. {struct, User} = proplists:get_value("user", Tweet),
  227. TweeterId = proplists:get_value("id", User),
  228. case m_identity:lookup_by_type_and_key("twitter_id", TweeterId, AsyncContext) of
  229. undefined ->
  230. ?DEBUG("Unknown user..."),
  231. z_session_manager:broadcast(#broadcast{type="error", message="Received a tweet for an unknown user.", title="Unknown user", stay=false}, Context);
  232. Row ->
  233. UserId = proplists:get_value(rsc_id, Row),
  234. CategoryId = m_category:name_to_id_check(tweet, AsyncContext),
  235. Props = [{title, proplists:get_value("screen_name", User) ++ " tweeted on " ++ proplists:get_value("created_at", Tweet)},
  236. {body, proplists:get_value("text", Tweet)},
  237. {source, proplists:get_value("source", Tweet)},
  238. {category_id, CategoryId},
  239. {tweet, Tweet},
  240. {is_published, true}],
  241. AdminContext = z_acl:sudo(AsyncContext),
  242. %% Create rsc
  243. {ok, TweetId} = m_rsc:insert(Props, AdminContext),
  244. %% Create edge
  245. {ok, _} = m_edge:insert(TweetId, author, UserId, AdminContext),
  246. %% Get images from the tweet and download them.
  247. Urls = extract_urls(Tweet),
  248. Ids = check_import_pictures(Urls, Context),
  249. %% Create edges
  250. [{ok, _} = m_edge:insert(TweetId, depiction, PictureId, Context) || PictureId <- Ids],
  251. Message = proplists:get_value("screen_name", User) ++ ": " ++ proplists:get_value("text", Tweet),
  252. z_session_manager:broadcast(#broadcast{type="notice", message=Message, title="New tweet!", stay=false}, AdminContext),
  253. TweetId
  254. end
  255. end,
  256. spawn(F);
  257. _ ->
  258. ok
  259. end.
  260. %%
  261. %% Process a chunk of http data
  262. %%
  263. receive_chunk(RequestId, Context) ->
  264. receive
  265. {http, {RequestId, {error, Reason}}} when(Reason =:= etimedout) orelse(Reason =:= timeout) ->
  266. exit({error, timeout});
  267. {http, {RequestId, {{_, 401, _} = Status, Headers, _}}} ->
  268. z_session_manager:broadcast(#broadcast{type="error", message="Twitter says the username/password is unauthorized.", title="Twitter module", stay=false}, z_acl:sudo(Context)),
  269. exit({error, {unauthorized, {Status, Headers}}});
  270. {http, {RequestId, Result}} ->
  271. exit({error, Result});
  272. %% start of streaming data
  273. {http,{RequestId, stream_start, Headers}} ->
  274. error_logger:info_msg("Streaming data start ~p ~n",[Headers]),
  275. ?MODULE:receive_chunk(RequestId, Context);
  276. %% streaming chunk of data
  277. %% this is where we will be looping around,
  278. %% we spawn this off to a seperate process as soon as we get the chunk and go back to receiving the tweets
  279. {http,{RequestId, stream, Data}} ->
  280. process_data(Data, Context),
  281. ?MODULE:receive_chunk(RequestId, Context);
  282. %% end of streaming data
  283. {http,{RequestId, stream_end, Headers}} ->
  284. error_logger:info_msg("Streaming data end ~p ~n", [Headers]),
  285. {ok, RequestId}
  286. after 120 * 1000 ->
  287. %% Timeout; respawn.
  288. exit({error, timeout})
  289. end.
  290. %%
  291. %% @doc The datamodel that is used in this module, installed before the module is started.
  292. %%
  293. datamodel() ->
  294. [{categories,
  295. [
  296. {tweet,
  297. text,
  298. [{title, <<"Tweet">>}]}
  299. ]
  300. },
  301. {resources,
  302. [
  303. {from_twitter,
  304. keyword,
  305. [{title, <<"From Twitter">>}]}
  306. ]
  307. }].
  308. %% handle_author_edges_upgrade(Context)
  309. %% @doc upgrade person->tweeted->tweet edges to tweed->author->person
  310. handle_author_edges_upgrade(C) ->
  311. Context = z_acl:sudo(C),
  312. case m_rsc:name_to_id_cat(tweeted, predicate, Context) of
  313. {ok, Tweeted} ->
  314. ?DEBUG("Found old 'tweeted' predicate, upgrading..."),
  315. Author = m_rsc:name_to_id_cat_check(author, predicate, Context),
  316. z_db:q("update edge set subject_id = object_id, object_id = subject_id, predicate_id = $1 where predicate_id = $2", [Author, Tweeted], Context),
  317. m_rsc:delete(Tweeted, Context),
  318. ok;
  319. _ ->
  320. nop
  321. end.
  322. extract_urls(Tweet) ->
  323. {struct, Entitites} = proplists:get_value("entities", Tweet),
  324. {array, Urls} = proplists:get_value("urls", Entitites),
  325. [proplists:get_value("url", UO) || {struct, UO} <- Urls].
  326. check_import_pictures([], _Context) ->
  327. [];
  328. check_import_pictures(Urls, Context) ->
  329. %% Get oEmbed info on all Urls
  330. EmbedlyUrl = "http://api.embed.ly/1/oembed?urls=" ++ string:join([z_utils:url_encode(Url) || Url <- Urls], ","),
  331. {ok, {{_Version, 200, _ReasonPhrase}, _Headers, Body}} =
  332. http:request(EmbedlyUrl),
  333. {array, Pictures} = mochijson:decode(Body),
  334. Props = [P || {struct, P} <- Pictures],
  335. UrlProps = lists:zip(Urls, Props),
  336. %% Import pictures
  337. Ids = lists:filter(fun (X) -> not(z_utils:is_empty(X)) end, [import_oembed(Url, Props1, Context) || {Url, Props1} <- UrlProps]),
  338. %% Give 'em edges to the 'from twitter' keyword
  339. [{ok, _} = m_edge:insert(Id, subject, m_rsc:rid(from_twitter, Context), Context) || Id <- Ids],
  340. Ids.
  341. %% @doc Import oEmbed-compatible proplist as a rsc.
  342. %% @spec import_oembed(Url, Props, Context) -> undefined | int()
  343. import_oembed(OriginalUrl, Props, Context) ->
  344. case oembed_category(proplists:get_value("type", Props)) of
  345. undefined ->
  346. undefined;
  347. Category ->
  348. RscProps = [{category, Category},
  349. {title, proplists:get_value("title", Props)},
  350. {summary, proplists:get_value("description", Props)},
  351. {website, OriginalUrl},
  352. {oembed, Props}],
  353. Url = proplists:get_value("url", Props),
  354. {ok, Id} = m_media:insert_url(Url, RscProps, Context),
  355. Id
  356. end.
  357. %% @doc Mapping from oEmbed category to Zotonic category. undefined means: do not import.
  358. oembed_category("photo") -> image;
  359. oembed_category("image") -> image; %% not standard oEmbed, but returned by yfrog
  360. oembed_category(_) -> undefined.
  361. %% test() ->
  362. %% Tweet = [{"entities",
  363. %% {struct,
  364. %% [{"urls",
  365. %% {array,
  366. %% [{struct,
  367. %% [{"indices",{array,[4,29]}},
  368. %% {"url","http://twitpic.com/441ivo"},
  369. %% {"expanded_url",null}]},
  370. %% {struct,
  371. %% [{"indices",{array,"$="}},
  372. %% {"url","http://twitpic.com/4801nb"},
  373. %% {"expanded_url",null}]}]}},
  374. %% {"hashtags",{array,[]}},
  375. %% {"user_mentions",{array,[]}}]}}],
  376. %% ["http://twitpic.com/441ivo", "http://twitpic.com/4801nb"] = extract_urls(Tweet),
  377. %% C = z_acl:sudo(z:c(scherpenisse)),
  378. %% check_import_pictures(extract_urls(Tweet), C).