PageRenderTime 34ms CodeModel.GetById 2ms app.highlight 27ms RepoModel.GetById 1ms app.codeStats 1ms

/modules/mod_twitter/mod_twitter.erl

https://code.google.com/p/zotonic/
Erlang | 448 lines | 262 code | 70 blank | 116 comment | 11 complexity | b9b3d88504d945cc1a38c22b522e8e8a MD5 | raw file
  1%% @author Arjan Scherpenisse <arjan@scherpenisse.net>
  2%% @copyright 2009 Arjan Scherpenisse
  3%% Date: 2009-12-10
  4%% @doc Follow users on Twitter using the streaming HTTP API.
  5%%
  6%% Setup instructions:
  7%% * Enable the mod_twitter module
  8%% * Put your login/password in the config keys mod_twitter.api_login
  9%%   and mod_twitter.api_password, respectively.
 10%% * Create a person in the Zotonic database, find a twitter ID on
 11%%   twitter, and put it in the person record on the admin edit page
 12%%   (sidebar)
 13%% * The module will start automatically to follow the users which have a twitter id set.
 14
 15%% Copyright 2009 Arjan Scherpenisse
 16%%
 17%% Licensed under the Apache License, Version 2.0 (the "License");
 18%% you may not use this file except in compliance with the License.
 19%% You may obtain a copy of the License at
 20%% 
 21%%     http://www.apache.org/licenses/LICENSE-2.0
 22%% 
 23%% Unless required by applicable law or agreed to in writing, software
 24%% distributed under the License is distributed on an "AS IS" BASIS,
 25%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 26%% See the License for the specific language governing permissions and
 27%% limitations under the License.
 28
 29-module(mod_twitter).
 30-author("Arjan Scherpenisse <arjan@scherpenisse.net>").
 31-behaviour(gen_server).
 32
 33-mod_title("Twitter").
 34-mod_description("Follow persons from Zotonic on Twitter using the streaming API.").
 35-mod_prio(200).
 36
 37%% gen_server exports
 38-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
 39-export([start_link/1]).
 40
 41%% interface functions
 42-export([
 43         datamodel/0,
 44         fetch/4, 
 45         observe_rsc_update_done/2,
 46         receive_chunk/2
 47]).
 48
 49-include_lib("zotonic.hrl").
 50
 51-record(state, {context, twitter_pid=undefined}).
 52
 53
 54observe_rsc_update_done(#rsc_update_done{id=Id}, Context) ->
 55    case m_rsc:p(Id, twitter_id, Context) of
 56        undefined ->
 57            ok;
 58        TwitterId ->
 59
 60            NonEmptyNewId = case TwitterId of
 61                                X when X =:= [] orelse X =:= <<>> orelse X =:= undefined -> false;
 62                                _ -> true
 63                            end,
 64            Restart = case m_identity:get_rsc(Id, twitter_id, Context) of
 65                          L when is_list(L) ->
 66                              case proplists:get_value(key, L) of
 67                                  TwitterId ->
 68                                      %% not changed
 69                                      false;
 70                                  _ ->
 71                                      m_identity:delete(proplists:get_value(id, L), Context),
 72                                      true
 73                              end;
 74                          _ -> NonEmptyNewId
 75                      end,
 76            case NonEmptyNewId of
 77                true -> m_identity:insert(Id, twitter_id, TwitterId, Context);
 78                _    -> ignore
 79            end,
 80            case Restart of
 81                true  -> 
 82                    z_notifier:notify(restart_twitter, Context);
 83                false -> ok
 84            end
 85    end.
 86
 87
 88%%====================================================================
 89%% API
 90%%====================================================================
 91%% @spec start_link(Args) -> {ok,Pid} | ignore | {error,Error}
 92%% @doc Starts the server
 93start_link(Args) when is_list(Args) ->
 94    gen_server:start_link(?MODULE, Args, []).
 95
 96%%====================================================================
 97%% gen_server callbacks
 98%%====================================================================
 99
100%% @spec init(Args) -> {ok, State} |
101%%                     {ok, State, Timeout} |
102%%                     ignore               |
103%%                     {stop, Reason}
104%% @doc Initiates the server.  The datamodel is installed before the server is started.
105init(Args) ->
106    process_flag(trap_exit, true),
107    {context, Context} = proplists:lookup(context, Args),
108    handle_author_edges_upgrade(Context),
109
110    z_notifier:observe(restart_twitter, self(), Context),
111
112    %% Start the twitter process
113    case start_following(Context) of
114        Pid when is_pid(Pid) ->
115            {ok, #state{context=z_context:new(Context),twitter_pid=Pid}};
116        undefined ->
117            {ok, #state{context=z_context:new(Context)}};
118        not_configured ->
119            z_session_manager:broadcast(#broadcast{type="error", message="No configuration (mod_twitter.api_login / mod_twitter.api_password) found, not starting.", title="Twitter", stay=true}, z_acl:sudo(Context)),
120            ignore
121    end.
122
123
124%% @spec handle_call(Request, From, State) -> {reply, Reply, State} |
125%%                                      {reply, Reply, State, Timeout} |
126%%                                      {noreply, State} |
127%%                                      {noreply, State, Timeout} |
128%%                                      {stop, Reason, Reply, State} |
129%%                                      {stop, Reason, State}
130%% @doc Trap unknown calls
131handle_call(Message, _From, State) ->
132    {stop, {unknown_call, Message}, State}.
133
134handle_cast({restart_twitter, _Context}, #state{context=Context,twitter_pid=Pid}=State) ->
135    case Pid of
136        undefined ->
137            %% not running
138            Pid2 = start_following(Context),
139            {noreply, #state{context=Context,twitter_pid=Pid2}};
140        _ ->
141            %% Exit the process; will be started again.
142            erlang:exit(Pid, restarting),
143            {noreply, State#state{twitter_pid=undefined}}
144    end;
145
146handle_cast(Message, State) ->
147    {stop, {unknown_cast, Message}, State}.
148
149
150
151%% @spec handle_info(Info, State) -> {noreply, State} |
152%%                                       {noreply, State, Timeout} |
153%%                                       {stop, Reason, State}
154%% @doc Handling all non call/cast messages
155handle_info({'EXIT', _Pid, restarting}, #state{context=Context}=State) ->
156    timer:sleep(500),
157    Pid=start_following(Context),
158    {noreply, State#state{twitter_pid=Pid}};
159
160handle_info({'EXIT', _Pid, {error, _Reason}}, #state{context=Context}=State) ->
161    timer:sleep(15000),
162    Pid=start_following(Context),
163    {noreply, State#state{twitter_pid=Pid}};
164
165handle_info(_Info, State) ->
166    {noreply, State}.
167
168
169%% @spec terminate(Reason, State) -> void()
170%% @doc This function is called by a gen_server when it is about to
171%% terminate. It should be the opposite of Module:init/1 and do any necessary
172%% cleaning up. When it returns, the gen_server terminates with Reason.
173%% The return value is ignored.
174terminate(_Reason, State) ->
175    z_notifier:observe(restart_twitter, self(), State#state.context),
176    ok.
177
178
179%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
180%% @doc Convert process state when code is changed
181
182code_change(_OldVsn, State, _Extra) ->
183    {ok, State}.
184
185
186%%====================================================================
187%% Internal functions
188%%====================================================================
189
190start_following(Context) ->
191    Login = case m_config:get_value(?MODULE, api_login, false, Context) of
192                LB when is_binary(LB) ->
193                    binary_to_list(LB);
194                L -> L
195            end,
196    Pass  = case m_config:get_value(?MODULE, api_password, false, Context) of
197                LP when is_binary(LP) ->
198                    binary_to_list(LP);
199                P -> P
200            end,
201    case Login of
202        false ->
203            error_logger:info_msg("No username/password configuration for mod_twitter. ~n"),
204            not_configured;
205        _ ->
206            %% Get list of twitter ids to follow
207            Follow1 = [binary_to_list(V) || {V} <- z_db:q("SELECT key FROM identity WHERE type = 'twitter_id' LIMIT 400", Context)],
208
209            case Follow1 of
210                [] ->
211                    error_logger:info_msg("No follow configuration for mod_twitter. ~n"),
212                    undefined;
213                _ ->
214                    URL = "http://" ++ Login ++ ":" ++ Pass ++ "@stream.twitter.com/1/statuses/filter.json",
215                    Follow = z_utils:combine(",", Follow1),
216                    Body = lists:flatten("follow=" ++ Follow),
217                    z_session_manager:broadcast(#broadcast{type="notice", message="Now waiting for tweets to arrive...", title="Twitter", stay=false}, Context),
218                    spawn_link(?MODULE, fetch, [URL, Body, 5, Context])
219            end
220    end.
221
222
223%%
224%% Main fetch process
225%%
226fetch(URL, Body, Sleep, Context) ->
227    case http:request(post,
228                      {URL, [], "application/x-www-form-urlencoded", Body},
229                      [],
230                      [{sync, false},
231                       {stream, self},
232                       {verbose, trace}]) of
233        {ok, RequestId} ->
234            case receive_chunk(RequestId, Context) of
235                {ok, _} ->
236                                                % stream broke normally retry
237                    timer:sleep(Sleep * 1000),
238                    fetch(URL, Body, Sleep, Context);
239                {error, timeout} ->
240                    error_logger:info_msg("Timeout ~n"),
241                    timer:sleep(Sleep * 1000),
242                    fetch(URL, Body, Sleep, Context);
243                {error, Reason} ->
244                    error_logger:error_msg("Error ~p ~n", [Reason]),
245                    timer:sleep(Sleep * 1000),
246                    exit({error, Reason})
247            end;
248        _Reason ->
249            error_logger:error_msg("Error ~p ~n", [_Reason]),
250            timer:sleep(Sleep * 1000),
251            fetch(URL, Body, Sleep, Context)
252    end.
253
254%
255% this is the tweet handler persumably you could do something useful here
256%
257process_data(Data, Context) ->
258    case Data of
259        <<${, _/binary>> ->
260            {struct, Tweet} = mochijson:decode(Data),
261
262            AsyncContext = z_context:prune_for_async(Context),
263            F = fun() ->
264                        {struct, User} = proplists:get_value("user", Tweet),
265                        TweeterId = proplists:get_value("id", User),
266                        case m_identity:lookup_by_type_and_key("twitter_id", TweeterId, AsyncContext) of
267                            undefined ->
268                                ?DEBUG("Unknown user..."),
269                                z_session_manager:broadcast(#broadcast{type="error", message="Received a tweet for an unknown user.", title="Unknown user", stay=false}, Context);
270                            Row ->
271                                UserId = proplists:get_value(rsc_id, Row),
272                                CategoryId = m_category:name_to_id_check(tweet, AsyncContext),
273                                Props = [{title, proplists:get_value("screen_name", User) ++ " tweeted on " ++ proplists:get_value("created_at", Tweet)},
274                                         {body, proplists:get_value("text", Tweet)},
275                                         {source, proplists:get_value("source", Tweet)},
276                                         {category_id, CategoryId},
277                                         {tweet, Tweet},
278                                         {is_published, true}],
279
280                                AdminContext = z_acl:sudo(AsyncContext),
281                                %% Create rsc
282                                {ok, TweetId} = m_rsc:insert(Props, AdminContext),
283
284                                %% Create edge
285                                {ok, _} = m_edge:insert(TweetId, author, UserId, AdminContext),
286
287                                %% Get images from the tweet and download them.
288                                Urls = extract_urls(Tweet),
289                                Ids = check_import_pictures(Urls, Context),
290                                %% Create edges
291                                [{ok, _} = m_edge:insert(TweetId, depiction, PictureId, Context) || PictureId <- Ids],
292
293                                Message = proplists:get_value("screen_name", User) ++ ": " ++ proplists:get_value("text", Tweet),
294                                z_session_manager:broadcast(#broadcast{type="notice", message=Message, title="New tweet!", stay=false}, AdminContext),
295                                TweetId
296                        end
297                end,
298            spawn(F);
299        _ ->
300            ok
301    end.
302
303
304%%
305%% Process a chunk of http data
306%%
307receive_chunk(RequestId, Context) ->
308    receive
309        {http, {RequestId, {error, Reason}}} when(Reason =:= etimedout) orelse(Reason =:= timeout) ->
310            exit({error, timeout});
311        {http, {RequestId, {{_, 401, _} = Status, Headers, _}}} ->
312            z_session_manager:broadcast(#broadcast{type="error", message="Twitter says the username/password is unauthorized.", title="Twitter module", stay=false}, z_acl:sudo(Context)),
313            exit({error, {unauthorized, {Status, Headers}}});
314        {http, {RequestId, Result}} ->
315            exit({error, Result});
316
317        %% start of streaming data
318        {http,{RequestId, stream_start, Headers}} ->
319            error_logger:info_msg("Streaming data start ~p ~n",[Headers]),
320            ?MODULE:receive_chunk(RequestId, Context);
321
322        %% streaming chunk of data
323        %% this is where we will be looping around,
324        %% we spawn this off to a seperate process as soon as we get the chunk and go back to receiving the tweets
325        {http,{RequestId, stream, Data}} ->
326            process_data(Data, Context),
327            ?MODULE:receive_chunk(RequestId, Context);
328
329        %% end of streaming data
330        {http,{RequestId, stream_end, Headers}} ->
331            error_logger:info_msg("Streaming data end ~p ~n", [Headers]),
332            {ok, RequestId}
333
334    after 120 * 1000 ->
335            %% Timeout; respawn.
336            exit({error, timeout})
337    end.
338
339
340
341%%
342%% @doc The datamodel that is used in this module, installed before the module is started.
343%%
344datamodel() ->
345    [{categories,
346      [
347       {tweet,
348        text,
349        [{title, <<"Tweet">>}]}
350      ]
351     },
352    {resources,
353      [
354       {from_twitter,
355        keyword,
356        [{title, <<"From Twitter">>}]}
357      ]
358     }].
359
360
361
362%% handle_author_edges_upgrade(Context)
363%% @doc upgrade person->tweeted->tweet edges to tweed->author->person
364handle_author_edges_upgrade(C) ->
365    Context = z_acl:sudo(C),
366    case m_rsc:name_to_id_cat(tweeted, predicate, Context) of
367        {ok, Tweeted} ->
368            ?DEBUG("Found old 'tweeted' predicate, upgrading..."),
369            Author = m_rsc:name_to_id_cat_check(author, predicate, Context),
370            z_db:q("update edge set subject_id = object_id, object_id = subject_id, predicate_id = $1 where predicate_id = $2", [Author, Tweeted], Context),
371            m_rsc:delete(Tweeted, Context),
372            ok;
373        _ ->
374            nop
375    end.
376
377
378extract_urls(Tweet) ->
379    {struct, Entitites} = proplists:get_value("entities", Tweet),
380    {array, Urls} = proplists:get_value("urls", Entitites),
381    [proplists:get_value("url", UO) || {struct, UO} <- Urls].
382    
383
384
385check_import_pictures([], _Context) ->
386    [];
387check_import_pictures(Urls, Context) ->
388    %% Get oEmbed info on all Urls
389    EmbedlyUrl = "http://api.embed.ly/1/oembed?urls=" ++ string:join([z_utils:url_encode(Url) || Url <- Urls], ","),
390    {ok, {{_Version, 200, _ReasonPhrase}, _Headers, Body}} =
391        http:request(EmbedlyUrl),
392    {array, Pictures} = mochijson:decode(Body),
393
394    Props = [P || {struct, P} <- Pictures],
395    UrlProps = lists:zip(Urls, Props),
396    %% Import pictures
397    Ids = lists:filter(fun (X) -> not(z_utils:is_empty(X)) end, [import_oembed(Url, Props1, Context) || {Url, Props1} <- UrlProps]),
398
399    %% Give 'em edges to the 'from twitter' keyword
400    [{ok, _} = m_edge:insert(Id, subject, m_rsc:rid(from_twitter, Context), Context) || Id <- Ids],
401    Ids.
402
403
404
405%% @doc Import oEmbed-compatible proplist as a rsc.
406%% @spec import_oembed(Url, Props, Context) -> undefined | int()
407import_oembed(OriginalUrl, Props, Context) ->
408    case oembed_category(proplists:get_value("type", Props)) of
409        undefined ->
410            undefined;
411        Category ->
412            RscProps = [{category, Category},
413                        {title, proplists:get_value("title", Props)},
414                        {summary, proplists:get_value("description", Props)},
415                        {website, OriginalUrl},
416                        {oembed, Props}],
417            Url = proplists:get_value("url", Props),
418            {ok, Id} = m_media:insert_url(Url, RscProps, Context),
419            Id
420    end.
421
422
423%% @doc Mapping from oEmbed category to Zotonic category. undefined means: do not import.
424oembed_category("photo") -> image;
425oembed_category("image") -> image; %% not standard oEmbed, but returned by yfrog
426oembed_category(_) -> undefined.
427
428
429
430%% test() ->
431%%     Tweet = [{"entities",
432%%               {struct,
433%%                [{"urls",
434%%                  {array,
435%%                   [{struct,
436%%                     [{"indices",{array,[4,29]}},
437%%                      {"url","http://twitpic.com/441ivo"},
438%%                      {"expanded_url",null}]},
439%%                    {struct,
440%%                     [{"indices",{array,"$="}},
441%%                      {"url","http://twitpic.com/4801nb"},
442%%                      {"expanded_url",null}]}]}},
443%%                 {"hashtags",{array,[]}},
444%%                 {"user_mentions",{array,[]}}]}}],
445%%     ["http://twitpic.com/441ivo", "http://twitpic.com/4801nb"] = extract_urls(Tweet),
446%%     C = z_acl:sudo(z:c(scherpenisse)),
447%%     check_import_pictures(extract_urls(Tweet), C).
448