PageRenderTime 56ms CodeModel.GetById 6ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 0ms

/strigi-0.7.7/libstreamanalyzer/lib/filelister.cpp

#
C++ | 359 lines | 309 code | 16 blank | 34 comment | 68 complexity | 51cdb953b3d4075e8ccdede7411f1c64 MD5 | raw file
Possible License(s): LGPL-2.0
  1/* This file is part of Strigi Desktop Search
  2 *
  3 * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
  4 * Copyright (C) 2007 Flavio Castelli <flavio.castelli@gmail.com>
  5 *
  6 * This library is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU Library General Public
  8 * License as published by the Free Software Foundation; either
  9 * version 2 of the License, or (at your option) any later version.
 10 *
 11 * This library is distributed in the hope that it will be useful,
 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 * Library General Public License for more details.
 15 *
 16 * You should have received a copy of the GNU Library General Public License
 17 * along with this library; see the file COPYING.LIB.  If not, write to
 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 19 * Boston, MA 02110-1301, USA.
 20 */
 21
 22#include <strigi/filelister.h>
 23#include <strigi/strigiconfig.h>
 24#include <strigi/strigi_thread.h>
 25#include <strigi/analyzerconfiguration.h>
 26#include <set>
 27#include <list>
 28#include <iostream>
 29#include <sys/types.h>
 30#include <sys/stat.h>
 31#include <cstdlib>
 32#include <cstring>
 33#include "stgdirent.h" //dirent replacement (includes native if available)
 34
 35#ifdef HAVE_DIRECT_H
 36#include <direct.h>
 37#endif
 38#include <errno.h>
 39
 40#ifdef _WIN32
 41// windows does not have symbolic links, so stat() is fine
 42#define strigi_lstat stat
 43#else
 44#define strigi_lstat lstat
 45#endif
 46
 47using namespace std;
 48using namespace Strigi;
 49
 50namespace
 51{
 52    /*!
 53    * @param path string containing path to check
 54    * Removes the terminating char to path.
 55    * Under Windows that char is '\', '/' under *nix
 56    */
 57    string fixPath (string path)
 58    {
 59        if ( path.c_str() == NULL || path.length() == 0 )
 60            return "";
 61
 62        string temp(path);
 63
 64    #ifdef HAVE_WINDOWS_H
 65        size_t l= temp.length();
 66        char* t = (char*)temp.c_str();
 67        for (size_t i=0;i<l;i++){
 68            if ( t[i] == '\\' )
 69                t[i] = '/';
 70        }
 71        temp[0] = tolower(temp.at(0));
 72    #endif
 73
 74        char separator = '/';
 75
 76        if (temp[temp.length() - 1 ] == separator)
 77            return temp.substr(0, temp.size() - 1);
 78
 79        return temp;
 80    }
 81}
 82
 83class FileLister::Private {
 84public:
 85    char path[10000];
 86    STRIGI_MUTEX_DEFINE(mutex);
 87    DIR** dirs;
 88    DIR** dirsEnd;
 89    DIR** curDir;
 90    string::size_type* len;
 91    string::size_type* lenEnd;
 92    string::size_type* curLen;
 93    time_t mtime;
 94    struct dirent* subdir;
 95    struct stat dirstat;
 96    set<string> listedDirs;
 97    const AnalyzerConfiguration* const config;
 98
 99    Private(const AnalyzerConfiguration* ic);
100    ~Private();
101    int nextFile(string& p, time_t& time) {
102        int r;
103        STRIGI_MUTEX_LOCK(&mutex);
104        r = nextFile();
105        if (r > 0) {
106            p.assign(path, r);
107            time = mtime;
108        }
109        STRIGI_MUTEX_UNLOCK(&mutex);
110        return r;
111    }
112    void startListing(const std::string&);
113    int nextFile();
114};
115FileLister::Private::Private(
116            const AnalyzerConfiguration* ic) :
117        config(ic) {
118    STRIGI_MUTEX_INIT(&mutex);
119    int nOpenDirs = 100;
120    dirs = (DIR**)malloc(sizeof(DIR*)*nOpenDirs);
121    dirsEnd = dirs + nOpenDirs;
122    len = (string::size_type*)malloc(sizeof(string::size_type)*nOpenDirs);
123    lenEnd = len + nOpenDirs;
124    curDir = dirs - 1;
125}
126void
127FileLister::Private::startListing(const string& dir){
128    listedDirs.clear();
129    curDir = dirs;
130    curLen = len;
131    string::size_type len = dir.length();
132    *curLen = len;
133    strcpy(path, dir.c_str());
134    if (len) {
135        if (path[len-1] != '/') {
136            path[len++] = '/';
137            path[len] = 0;
138            *curLen = len;
139        }
140        DIR* d = opendir(path);
141        if (d) {
142            *curDir = d;
143            listedDirs.insert (path);
144        } else {
145            curDir--;
146        }
147    } else {
148        curDir--;
149    }
150}
151FileLister::Private::~Private() {
152    while (curDir >= dirs) {
153        if (*curDir) {
154            closedir(*curDir);
155        }
156        curDir--;
157    }
158    free(dirs);
159    free(len);
160    STRIGI_MUTEX_DESTROY(&mutex);
161}
162int
163FileLister::Private::nextFile() {
164
165    while (curDir >= dirs) {
166        DIR* dir = *curDir;
167        string::size_type l = *curLen;
168        subdir = readdir(dir);
169        while (subdir) {
170            // skip the directories '.' and '..'
171            char c1 = subdir->d_name[0];
172            if (c1 == '.') {
173                char c2 = subdir->d_name[1];
174                if (c2 == '.' || c2 == '\0') {
175                    subdir = readdir(dir);
176                    continue;
177                }
178            }
179            strcpy(path + l, subdir->d_name);
180            string::size_type sl = l + strlen(subdir->d_name);
181            if (strigi_lstat(path, &dirstat) == 0) {
182                if (S_ISREG(dirstat.st_mode)) {
183                    if (config == 0 || config->indexFile(path, path+l)) {
184                        mtime = dirstat.st_mtime;
185                        return (int)sl;
186                    }
187                } else if (dirstat.st_mode & S_IFDIR && (config == 0
188                        || config->indexDir(path, path+l))) {
189                    mtime = dirstat.st_mtime;
190                    strcpy(this->path+sl, "/");
191                    DIR* d = opendir(path);
192                    if (d) {
193                        curDir++;
194                        curLen++;
195                        dir = *curDir = d;
196                        l = *curLen = sl+1;
197                        listedDirs.insert ( path);
198                    }
199                }
200            }
201            subdir = readdir(dir);
202        }
203        closedir(dir);
204        curDir--;
205        curLen--;
206    }
207    return -1;
208}
209FileLister::FileLister(const AnalyzerConfiguration* ic)
210    : p(new Private(ic)) {
211}
212FileLister::~FileLister() {
213    delete p;
214}
215void
216FileLister::startListing(const string& dir) {
217    p->startListing(dir);
218}
219int
220FileLister::nextFile(std::string& path, time_t& time) {
221    return p->nextFile(path, time);
222}
223int
224FileLister::nextFile(const char*& path, time_t& time) {
225    int r = p->nextFile();
226    if (r >= 0) {
227        time = p->mtime;
228        path = p->path;
229    }
230    return r;
231}
232void
233FileLister::skipTillAfter(const std::string& lastToSkip) {
234    int r = p->nextFile();
235    while (r >= 0 && p->path != lastToSkip) {
236        r = p->nextFile();
237    }
238}
239
240class DirLister::Private {
241public:
242    STRIGI_MUTEX_DEFINE(mutex);
243    list<string> todoPaths;
244    const AnalyzerConfiguration* const config;
245
246    Private(const AnalyzerConfiguration* ic) :config(ic) {}
247    int nextDir(std::string& path,
248        std::vector<std::pair<std::string, struct stat> >& dirs);
249};
250
251DirLister::DirLister(const AnalyzerConfiguration* ic)
252    : p(new Private(ic)) {
253    STRIGI_MUTEX_INIT(&p->mutex);
254}
255DirLister::~DirLister() {
256    STRIGI_MUTEX_DESTROY(&p->mutex);
257    delete p;
258}
259void
260DirLister::startListing(const string& dir) {
261    STRIGI_MUTEX_LOCK(&p->mutex);
262    p->todoPaths.push_back(dir);
263    STRIGI_MUTEX_UNLOCK(&p->mutex);
264}
265void
266DirLister::stopListing() {
267    STRIGI_MUTEX_LOCK(&p->mutex);
268    p->todoPaths.clear();
269    STRIGI_MUTEX_UNLOCK(&p->mutex);
270}
271int
272DirLister::Private::nextDir(std::string& path,
273        std::vector<std::pair<std::string, struct stat> >& dirs) {
274    string entryname;
275    string entrypath;
276    size_t entrypathlength;
277    // check if there are more directories to work on
278    // open the directory
279    STRIGI_MUTEX_LOCK(&mutex);
280    if (todoPaths.empty()) {
281        STRIGI_MUTEX_UNLOCK(&mutex);
282        return -1;
283    }
284    path.assign(todoPaths.front());
285    todoPaths.pop_front();
286    // Only unlock of the todo list is not empty.
287    // If the list is empty, other threads must wait for this thread to populate
288    // the list.
289    bool mutexLocked = true;
290    if (!todoPaths.empty()) {
291        STRIGI_MUTEX_UNLOCK(&mutex);
292        mutexLocked = false;
293    }
294    entrypathlength = path.length()+1;
295    entrypath.assign(path);
296    entrypath.append("/");
297    dirs.clear();
298    DIR* dir;
299    if (path.size()) {
300        dir = opendir(path.c_str());
301    } else {
302        // special case for root directory '/' on unix systems
303        dir = opendir("/");
304    }
305    if (!dir) {
306        int e = errno;
307        if (mutexLocked) {
308            STRIGI_MUTEX_UNLOCK(&mutex);
309        }
310        // if permission is denied, this is not an error
311        return (e == EACCES) ?0 :-1;
312    }
313    struct dirent* entry = readdir(dir);
314    struct stat entrystat;
315    while (entry) {
316        entryname.assign(entry->d_name);
317        if (entryname != "." && entryname != "..") {
318            entrypath.resize(entrypathlength);
319            entrypath.append(entryname);
320            if (strigi_lstat(entrypath.c_str(), &entrystat) == 0) {
321                if (S_ISDIR(entrystat.st_mode)) {
322                    if (config == 0 ||
323                            config->indexDir(
324                                entrypath.c_str(), entryname.c_str())) {
325                        if (!mutexLocked) {
326                            STRIGI_MUTEX_LOCK(&mutex);
327                        }
328                        todoPaths.push_back(entrypath);
329                        STRIGI_MUTEX_UNLOCK(&mutex);
330                        mutexLocked = false;
331                        dirs.push_back(make_pair<string,struct stat>(
332                            entrypath, entrystat));
333                    }
334                } else if (config == 0 || config->indexFile(entrypath.c_str(),
335                        entryname.c_str())) {
336                    dirs.push_back(
337                        make_pair<string,struct stat>(entrypath, entrystat));
338                }
339            }
340        }
341        entry = readdir(dir);
342    }
343    closedir(dir);
344    if (mutexLocked) {
345        STRIGI_MUTEX_UNLOCK(&mutex);
346    }
347    return 0;
348}
349int
350DirLister::nextDir(std::string& path,
351        std::vector<std::pair<std::string, struct stat> >& dirs) {
352    return p->nextDir(path, dirs);
353}
354void
355DirLister::skipTillAfter(const std::string& lastToSkip) {
356    string path;
357    vector<pair<string, struct stat> > dirs;
358    while (nextDir(path, dirs) >= 0 && path != lastToSkip) {}
359}