/strigi-0.7.7/libstreamanalyzer/lib/filelister.cpp
C++ | 359 lines | 309 code | 16 blank | 34 comment | 68 complexity | 51cdb953b3d4075e8ccdede7411f1c64 MD5 | raw file
Possible License(s): LGPL-2.0
1/* This file is part of Strigi Desktop Search
2 *
3 * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
4 * Copyright (C) 2007 Flavio Castelli <flavio.castelli@gmail.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 */
21
22#include <strigi/filelister.h>
23#include <strigi/strigiconfig.h>
24#include <strigi/strigi_thread.h>
25#include <strigi/analyzerconfiguration.h>
26#include <set>
27#include <list>
28#include <iostream>
29#include <sys/types.h>
30#include <sys/stat.h>
31#include <cstdlib>
32#include <cstring>
33#include "stgdirent.h" //dirent replacement (includes native if available)
34
35#ifdef HAVE_DIRECT_H
36#include <direct.h>
37#endif
38#include <errno.h>
39
40#ifdef _WIN32
41// windows does not have symbolic links, so stat() is fine
42#define strigi_lstat stat
43#else
44#define strigi_lstat lstat
45#endif
46
47using namespace std;
48using namespace Strigi;
49
50namespace
51{
52 /*!
53 * @param path string containing path to check
54 * Removes the terminating char to path.
55 * Under Windows that char is '\', '/' under *nix
56 */
57 string fixPath (string path)
58 {
59 if ( path.c_str() == NULL || path.length() == 0 )
60 return "";
61
62 string temp(path);
63
64 #ifdef HAVE_WINDOWS_H
65 size_t l= temp.length();
66 char* t = (char*)temp.c_str();
67 for (size_t i=0;i<l;i++){
68 if ( t[i] == '\\' )
69 t[i] = '/';
70 }
71 temp[0] = tolower(temp.at(0));
72 #endif
73
74 char separator = '/';
75
76 if (temp[temp.length() - 1 ] == separator)
77 return temp.substr(0, temp.size() - 1);
78
79 return temp;
80 }
81}
82
83class FileLister::Private {
84public:
85 char path[10000];
86 STRIGI_MUTEX_DEFINE(mutex);
87 DIR** dirs;
88 DIR** dirsEnd;
89 DIR** curDir;
90 string::size_type* len;
91 string::size_type* lenEnd;
92 string::size_type* curLen;
93 time_t mtime;
94 struct dirent* subdir;
95 struct stat dirstat;
96 set<string> listedDirs;
97 const AnalyzerConfiguration* const config;
98
99 Private(const AnalyzerConfiguration* ic);
100 ~Private();
101 int nextFile(string& p, time_t& time) {
102 int r;
103 STRIGI_MUTEX_LOCK(&mutex);
104 r = nextFile();
105 if (r > 0) {
106 p.assign(path, r);
107 time = mtime;
108 }
109 STRIGI_MUTEX_UNLOCK(&mutex);
110 return r;
111 }
112 void startListing(const std::string&);
113 int nextFile();
114};
115FileLister::Private::Private(
116 const AnalyzerConfiguration* ic) :
117 config(ic) {
118 STRIGI_MUTEX_INIT(&mutex);
119 int nOpenDirs = 100;
120 dirs = (DIR**)malloc(sizeof(DIR*)*nOpenDirs);
121 dirsEnd = dirs + nOpenDirs;
122 len = (string::size_type*)malloc(sizeof(string::size_type)*nOpenDirs);
123 lenEnd = len + nOpenDirs;
124 curDir = dirs - 1;
125}
126void
127FileLister::Private::startListing(const string& dir){
128 listedDirs.clear();
129 curDir = dirs;
130 curLen = len;
131 string::size_type len = dir.length();
132 *curLen = len;
133 strcpy(path, dir.c_str());
134 if (len) {
135 if (path[len-1] != '/') {
136 path[len++] = '/';
137 path[len] = 0;
138 *curLen = len;
139 }
140 DIR* d = opendir(path);
141 if (d) {
142 *curDir = d;
143 listedDirs.insert (path);
144 } else {
145 curDir--;
146 }
147 } else {
148 curDir--;
149 }
150}
151FileLister::Private::~Private() {
152 while (curDir >= dirs) {
153 if (*curDir) {
154 closedir(*curDir);
155 }
156 curDir--;
157 }
158 free(dirs);
159 free(len);
160 STRIGI_MUTEX_DESTROY(&mutex);
161}
162int
163FileLister::Private::nextFile() {
164
165 while (curDir >= dirs) {
166 DIR* dir = *curDir;
167 string::size_type l = *curLen;
168 subdir = readdir(dir);
169 while (subdir) {
170 // skip the directories '.' and '..'
171 char c1 = subdir->d_name[0];
172 if (c1 == '.') {
173 char c2 = subdir->d_name[1];
174 if (c2 == '.' || c2 == '\0') {
175 subdir = readdir(dir);
176 continue;
177 }
178 }
179 strcpy(path + l, subdir->d_name);
180 string::size_type sl = l + strlen(subdir->d_name);
181 if (strigi_lstat(path, &dirstat) == 0) {
182 if (S_ISREG(dirstat.st_mode)) {
183 if (config == 0 || config->indexFile(path, path+l)) {
184 mtime = dirstat.st_mtime;
185 return (int)sl;
186 }
187 } else if (dirstat.st_mode & S_IFDIR && (config == 0
188 || config->indexDir(path, path+l))) {
189 mtime = dirstat.st_mtime;
190 strcpy(this->path+sl, "/");
191 DIR* d = opendir(path);
192 if (d) {
193 curDir++;
194 curLen++;
195 dir = *curDir = d;
196 l = *curLen = sl+1;
197 listedDirs.insert ( path);
198 }
199 }
200 }
201 subdir = readdir(dir);
202 }
203 closedir(dir);
204 curDir--;
205 curLen--;
206 }
207 return -1;
208}
209FileLister::FileLister(const AnalyzerConfiguration* ic)
210 : p(new Private(ic)) {
211}
212FileLister::~FileLister() {
213 delete p;
214}
215void
216FileLister::startListing(const string& dir) {
217 p->startListing(dir);
218}
219int
220FileLister::nextFile(std::string& path, time_t& time) {
221 return p->nextFile(path, time);
222}
223int
224FileLister::nextFile(const char*& path, time_t& time) {
225 int r = p->nextFile();
226 if (r >= 0) {
227 time = p->mtime;
228 path = p->path;
229 }
230 return r;
231}
232void
233FileLister::skipTillAfter(const std::string& lastToSkip) {
234 int r = p->nextFile();
235 while (r >= 0 && p->path != lastToSkip) {
236 r = p->nextFile();
237 }
238}
239
240class DirLister::Private {
241public:
242 STRIGI_MUTEX_DEFINE(mutex);
243 list<string> todoPaths;
244 const AnalyzerConfiguration* const config;
245
246 Private(const AnalyzerConfiguration* ic) :config(ic) {}
247 int nextDir(std::string& path,
248 std::vector<std::pair<std::string, struct stat> >& dirs);
249};
250
251DirLister::DirLister(const AnalyzerConfiguration* ic)
252 : p(new Private(ic)) {
253 STRIGI_MUTEX_INIT(&p->mutex);
254}
255DirLister::~DirLister() {
256 STRIGI_MUTEX_DESTROY(&p->mutex);
257 delete p;
258}
259void
260DirLister::startListing(const string& dir) {
261 STRIGI_MUTEX_LOCK(&p->mutex);
262 p->todoPaths.push_back(dir);
263 STRIGI_MUTEX_UNLOCK(&p->mutex);
264}
265void
266DirLister::stopListing() {
267 STRIGI_MUTEX_LOCK(&p->mutex);
268 p->todoPaths.clear();
269 STRIGI_MUTEX_UNLOCK(&p->mutex);
270}
271int
272DirLister::Private::nextDir(std::string& path,
273 std::vector<std::pair<std::string, struct stat> >& dirs) {
274 string entryname;
275 string entrypath;
276 size_t entrypathlength;
277 // check if there are more directories to work on
278 // open the directory
279 STRIGI_MUTEX_LOCK(&mutex);
280 if (todoPaths.empty()) {
281 STRIGI_MUTEX_UNLOCK(&mutex);
282 return -1;
283 }
284 path.assign(todoPaths.front());
285 todoPaths.pop_front();
286 // Only unlock of the todo list is not empty.
287 // If the list is empty, other threads must wait for this thread to populate
288 // the list.
289 bool mutexLocked = true;
290 if (!todoPaths.empty()) {
291 STRIGI_MUTEX_UNLOCK(&mutex);
292 mutexLocked = false;
293 }
294 entrypathlength = path.length()+1;
295 entrypath.assign(path);
296 entrypath.append("/");
297 dirs.clear();
298 DIR* dir;
299 if (path.size()) {
300 dir = opendir(path.c_str());
301 } else {
302 // special case for root directory '/' on unix systems
303 dir = opendir("/");
304 }
305 if (!dir) {
306 int e = errno;
307 if (mutexLocked) {
308 STRIGI_MUTEX_UNLOCK(&mutex);
309 }
310 // if permission is denied, this is not an error
311 return (e == EACCES) ?0 :-1;
312 }
313 struct dirent* entry = readdir(dir);
314 struct stat entrystat;
315 while (entry) {
316 entryname.assign(entry->d_name);
317 if (entryname != "." && entryname != "..") {
318 entrypath.resize(entrypathlength);
319 entrypath.append(entryname);
320 if (strigi_lstat(entrypath.c_str(), &entrystat) == 0) {
321 if (S_ISDIR(entrystat.st_mode)) {
322 if (config == 0 ||
323 config->indexDir(
324 entrypath.c_str(), entryname.c_str())) {
325 if (!mutexLocked) {
326 STRIGI_MUTEX_LOCK(&mutex);
327 }
328 todoPaths.push_back(entrypath);
329 STRIGI_MUTEX_UNLOCK(&mutex);
330 mutexLocked = false;
331 dirs.push_back(make_pair<string,struct stat>(
332 entrypath, entrystat));
333 }
334 } else if (config == 0 || config->indexFile(entrypath.c_str(),
335 entryname.c_str())) {
336 dirs.push_back(
337 make_pair<string,struct stat>(entrypath, entrystat));
338 }
339 }
340 }
341 entry = readdir(dir);
342 }
343 closedir(dir);
344 if (mutexLocked) {
345 STRIGI_MUTEX_UNLOCK(&mutex);
346 }
347 return 0;
348}
349int
350DirLister::nextDir(std::string& path,
351 std::vector<std::pair<std::string, struct stat> >& dirs) {
352 return p->nextDir(path, dirs);
353}
354void
355DirLister::skipTillAfter(const std::string& lastToSkip) {
356 string path;
357 vector<pair<string, struct stat> > dirs;
358 while (nextDir(path, dirs) >= 0 && path != lastToSkip) {}
359}