PageRenderTime 38ms CodeModel.GetById 25ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/src/main/java/de/jetwick/util/Util.java

https://github.com/neville-agius/Jetwick
Java | 353 lines | 276 code | 43 blank | 34 comment | 26 complexity | df3055b4e92fd93cefd0bb4cb8b9c035 MD5 | raw file
  1/**
  2 * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *         http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16package de.jetwick.util;
 17
 18import com.google.inject.Guice;
 19import com.google.inject.Inject;
 20import com.google.inject.Injector;
 21import com.google.inject.Module;
 22import com.google.inject.Provider;
 23import de.jetwick.config.Configuration;
 24import de.jetwick.config.DefaultModule;
 25import de.jetwick.es.ElasticTweetSearch;
 26import de.jetwick.es.ElasticUserSearch;
 27import de.jetwick.rmi.RMIClient;
 28import de.jetwick.es.JetwickQuery;
 29import de.jetwick.data.JUser;
 30import de.jetwick.es.CreateObjectsInterface;
 31import de.jetwick.es.TweetQuery;
 32import de.jetwick.tw.Credits;
 33import de.jetwick.tw.MyTweetGrabber;
 34import de.jetwick.tw.TwitterSearch;
 35import de.jetwick.tw.queue.QueueThread;
 36import java.util.Arrays;
 37import java.util.LinkedHashSet;
 38import java.util.List;
 39import java.util.Map;
 40import java.util.Set;
 41import java.util.TreeSet;
 42import org.elasticsearch.action.search.SearchResponse;
 43import org.elasticsearch.index.query.xcontent.ExistsFilterBuilder;
 44import org.elasticsearch.index.query.xcontent.FilterBuilders;
 45import org.elasticsearch.search.facet.terms.TermsFacet;
 46import org.slf4j.Logger;
 47import org.slf4j.LoggerFactory;
 48
 49/**
 50 *
 51 * @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
 52 */
 53public class Util {
 54
 55    private static Logger logger = LoggerFactory.getLogger(Util.class);
 56    @Inject
 57    private ElasticUserSearch userSearch;
 58    @Inject
 59    private ElasticTweetSearch tweetSearch;
 60    private int userCounter;
 61    private Configuration config = new Configuration();
 62
 63    public static void main(String[] args) {
 64        Map<String, String> map = Helper.parseArguments(args);
 65
 66        Util util = new Util();
 67        String argStr = "";
 68        if (!Helper.isEmpty(map.get("deleteAll"))) {
 69            util.deleteAll();
 70            return;
 71        }
 72
 73        argStr = map.get("fillFrom");
 74        if (!Helper.isEmpty(argStr)) {
 75            String fromUrl = argStr;
 76            util.fillFrom(fromUrl);
 77            return;
 78        }
 79
 80        argStr = map.get("clearUserTokens");
 81        if (!Helper.isEmpty(argStr)) {
 82            String newUserIndexName = argStr;
 83            util.clearUserTokens(newUserIndexName);
 84            return;
 85        }
 86
 87        if (!Helper.isEmpty(map.get("copyStaticTweets"))) {
 88            util.copyStaticTweets();
 89            return;
 90        }
 91
 92        argStr = map.get("showFollowers");
 93        if (!Helper.isEmpty(argStr)) {
 94            String user = argStr;
 95            util.showFollowers(user);
 96            return;
 97        }
 98
 99        if (!Helper.isEmpty(map.get("optimize"))) {
100            util.optimize();
101            return;
102        }
103
104        // copyUserIndex=newtwindex
105        argStr = map.get("copyUserIndex");
106        if (!Helper.isEmpty(argStr)) {
107            String newIndex = argStr;
108            util.copyUserIndex(newIndex);
109            return;
110        }
111
112        // copyUserIndex=newtwindex
113        argStr = map.get("copyTweetIndex");
114        if (!Helper.isEmpty(argStr)) {
115            String newIndex = argStr;
116            util.copyTweetIndex(newIndex);
117            return;
118        }
119
120        // call after copyIndex:
121        // removeOldTweetIndexAndAlias=newtwindex
122        argStr = map.get("removeIndexAndAddAlias");
123        if (!Helper.isEmpty(argStr)) {
124            String oldIndex = argStr.split(",")[0];
125            String newIndex = argStr.split(",")[1];
126            util.removeIndexAndAddAlias(oldIndex, newIndex);
127            return;
128        }
129    }
130
131    public Util() {
132        Module module = new DefaultModule();
133        Guice.createInjector(module).injectMembers(this);
134    }
135
136    public void deleteAll() {
137        // why don't we need to set? query.setQueryType("simple")
138        userSearch.deleteAll();
139        userSearch.refresh();
140        logger.info("Successfully finished deleteAll");
141    }
142
143    private void copyStaticTweets() {
144        Module module = new DefaultModule();
145        Injector injector = Guice.createInjector(module);
146        Provider<RMIClient> rmiProvider = injector.getProvider(RMIClient.class);
147        Configuration cfg = injector.getInstance(Configuration.class);
148        TwitterSearch twSearch = injector.getInstance(TwitterSearch.class);
149        twSearch.initTwitter4JInstance(cfg.getTwitterSearchCredits().getToken(), cfg.getTwitterSearchCredits().getTokenSecret());
150        ElasticTweetSearch fromUserSearch = new ElasticTweetSearch(injector.getInstance(Configuration.class));
151        JetwickQuery query = new TweetQuery().addFilterQuery(ElasticTweetSearch.UPDATE_DT, "[* TO *]");
152        // TODO set facetlimit to 2000
153        query.addFacetField("user").setSize(0);
154        SearchResponse rsp = fromUserSearch.search(query);
155
156        TermsFacet tf = (TermsFacet) rsp.getFacets().facet("user");
157        logger.info("found: " + tf.entries().size() + " users with the specified criteria");
158        int SLEEP = 30;
159        int counter = 0;
160        for (TermsFacet.Entry tmpUser : tf.entries()) {
161            if (tmpUser.getCount() < 20)
162                break;
163
164            while (twSearch.getRateLimit() <= 3) {
165                try {
166                    logger.info("sleeping " + SLEEP + " seconds to avoid ratelimit violation");
167                    Thread.sleep(1000 * SLEEP);
168                } catch (InterruptedException ex) {
169                    throw new IllegalStateException(ex);
170                }
171            }
172
173            logger.info(counter++ + "> feed pipe from " + tmpUser.getTerm() + " with " + tmpUser.getCount() + " tweets");
174
175            MaxBoundSet boundSet = new MaxBoundSet<String>(0, 0);
176            // try updating can fail so try max 3 times
177            for (int trial = 0; trial < 3; trial++) {
178                MyTweetGrabber grabber = new MyTweetGrabber().setMyBoundSet(boundSet).
179                        init(null, null, tmpUser.getTerm()).setTweetsCount((int) tmpUser.getCount()).
180                        setRmiClient(rmiProvider).setTwitterSearch(twSearch);
181                QueueThread pkg = grabber.queueTweetPackage();
182                Thread t = new Thread(pkg);
183                t.start();
184                try {
185                    t.join();
186                    if (pkg.getException() == null)
187                        break;
188
189                    logger.warn(trial + "> Try again feeding of user " + tmpUser.getTerm() + " for tweet package " + pkg);
190                } catch (InterruptedException ex) {
191                    logger.warn("interrupted", ex);
192                    break;
193                }
194            }
195        }
196
197        // TODO send via RMI
198    }
199
200    public void fillFrom(final String fromUrl) {
201        ElasticTweetSearch fromTweetSearch = new ElasticTweetSearch(fromUrl, null, null);
202        JetwickQuery query = new TweetQuery();
203        long maxPage = 1;
204        int hitsPerPage = 300;
205        Set<JUser> users = new LinkedHashSet<JUser>();
206        Runnable optimizeOnExit = new Runnable() {
207
208            @Override
209            public void run() {
210                userSearch.refresh();
211                logger.info(userCounter + " users pushed to default tweet search from " + fromUrl);
212            }
213        };
214        Runtime.getRuntime().addShutdownHook(new Thread(optimizeOnExit));
215
216        for (int page = 0; page < maxPage; page++) {
217            query.attachPagability(page, hitsPerPage);
218            users.clear();
219
220            SearchResponse rsp;
221            try {
222                rsp = fromTweetSearch.search(users, query);
223            } catch (Exception ex) {
224                logger.warn("Error while searching!", ex);
225                continue;
226            }
227            if (maxPage == 1) {
228                maxPage = rsp.getHits().getTotalHits() / hitsPerPage + 1;
229                logger.info("Paging though query:" + query.toString());
230                logger.info("Set numFound to " + rsp.getHits().getTotalHits());
231            }
232
233            for (JUser user : users) {
234                userSearch.save(user, false);
235            }
236            userCounter += users.size();
237            logger.info("Page " + page + " out of " + maxPage + " hitsPerPage:" + hitsPerPage);
238
239            if (page * hitsPerPage % 100000 == 0) {
240                logger.info("Commit ...");
241                userSearch.refresh();
242            }
243        }
244    }
245
246    public void showFollowers(String user) {
247//        ElasticUserSearch uSearch = createUserSearch();
248//        Set<SolrUser> jetwickUsers = new LinkedHashSet<SolrUser>();
249//        uSearch.search(jetwickUsers, new SolrQuery().setRows(10000));
250        final Set<String> set = new TreeSet<String>();
251//        for (SolrUser u : jetwickUsers) {
252//            set.add(u.getScreenName());
253//        }
254        Credits credits = config.getTwitterSearchCredits();
255        TwitterSearch tw4j = new TwitterSearch().setConsumer(credits.getConsumerKey(), credits.getConsumerSecret());
256        tw4j.initTwitter4JInstance(credits.getToken(), credits.getTokenSecret());
257        tw4j.getFollowers(user, new AnyExecutor<JUser>() {
258
259            @Override
260            public JUser execute(JUser o) {
261//                if (set.contains(o.getScreenName()))
262                set.add(o.getScreenName());
263                return null;
264            }
265        });
266        for (String u : set) {
267            System.out.println(u);
268        }
269    }
270
271    public void optimize() {
272        tweetSearch.optimize();
273    }
274
275    public void copyTweetIndex(String newIndex) {
276        try {
277            logger.info("Old index has totalhits:" + tweetSearch.countAll());
278            if (!tweetSearch.indexExists(newIndex)) {
279                logger.info("New Index '" + newIndex + "' does not exist! create it before copy!");
280                return;
281            }
282
283            ExistsFilterBuilder filter = FilterBuilders.existsFilter(ElasticTweetSearch.UPDATE_DT);
284            logger.info("Now copy from " + tweetSearch.getIndexName() + " to " + newIndex + " with exist filter: " + filter);
285            tweetSearch.mergeIndices(Arrays.asList(tweetSearch.getIndexName()), newIndex,
286                    10000, true, tweetSearch, filter);
287
288            tweetSearch.setIndexName(newIndex);
289            logger.info("New index has totalhits:" + tweetSearch.countAll() + " Now optimize ...");
290            tweetSearch.optimize();
291        } catch (Exception ex) {
292            logger.error("Exception while copyIndex", ex);
293        }
294    }
295
296    public void copyUserIndex(String newIndex) {
297        try {
298            logger.info("Old index has totalhits:" + userSearch.countAll());
299            if (!userSearch.indexExists(newIndex)) {
300                logger.info("New Index '" + newIndex + "' does not exist! create it before copy!");
301                return;
302            }
303
304            logger.info("Now copy from " + userSearch.getIndexName() + " to " + newIndex);
305            userSearch.mergeIndices(Arrays.asList(userSearch.getIndexName()), newIndex,
306                    10000, true, userSearch, null);
307
308            userSearch.setIndexName(newIndex);
309            logger.info("New index has totalhits:" + userSearch.countAll() + " Now optimize ...");
310            userSearch.optimize();
311        } catch (Exception ex) {
312            logger.error("Exception while copyIndex", ex);
313        }
314    }
315
316    public void removeIndexAndAddAlias(String oldIndex, String newIndex) {
317        tweetSearch.deleteIndex(oldIndex);
318        tweetSearch.addIndexAlias(newIndex, oldIndex);
319
320        logger.info("added alias:" + newIndex + " for deleted:" + oldIndex);
321    }
322
323    public void clearUserTokens(String newIndex) {
324        try {
325            logger.info("Old index has totalhits:" + userSearch.countAll());
326            if (!userSearch.indexExists(newIndex)) {
327                logger.info("New Index '" + newIndex + "' does not exist! create it before copy!");
328                return;
329            }
330
331            logger.info("Now copy from " + userSearch.getIndexName() + " to " + newIndex);
332            userSearch.mergeIndices(Arrays.asList(userSearch.getIndexName()), newIndex, 10000, true,
333                    new CreateObjectsInterface<JUser>() {
334
335                        @Override
336                        public List<JUser> collectObjects(SearchResponse rsp) {
337                            List<JUser> users = userSearch.collectObjects(rsp);
338                            for (JUser u : users) {
339                                u.setTwitterToken(null);
340                                u.setTwitterTokenSecret(null);
341                            }
342                            return users;
343                        }
344                    }, null);
345
346            userSearch.setIndexName(newIndex);
347            logger.info("New index has totalhits:" + userSearch.countAll() + " Now optimize ...");
348            userSearch.optimize();
349        } catch (Exception ex) {
350            logger.error("Exception while copyIndex", ex);
351        }
352    }
353}