PageRenderTime 53ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/strigi-0.7.7/libstreamanalyzer/lib/filelister.cpp

#
C++ | 359 lines | 309 code | 16 blank | 34 comment | 68 complexity | 51cdb953b3d4075e8ccdede7411f1c64 MD5 | raw file
Possible License(s): LGPL-2.0
  1. /* This file is part of Strigi Desktop Search
  2. *
  3. * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
  4. * Copyright (C) 2007 Flavio Castelli <flavio.castelli@gmail.com>
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Library General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Library General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Library General Public License
  17. * along with this library; see the file COPYING.LIB. If not, write to
  18. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19. * Boston, MA 02110-1301, USA.
  20. */
  21. #include <strigi/filelister.h>
  22. #include <strigi/strigiconfig.h>
  23. #include <strigi/strigi_thread.h>
  24. #include <strigi/analyzerconfiguration.h>
  25. #include <set>
  26. #include <list>
  27. #include <iostream>
  28. #include <sys/types.h>
  29. #include <sys/stat.h>
  30. #include <cstdlib>
  31. #include <cstring>
  32. #include "stgdirent.h" //dirent replacement (includes native if available)
  33. #ifdef HAVE_DIRECT_H
  34. #include <direct.h>
  35. #endif
  36. #include <errno.h>
  37. #ifdef _WIN32
  38. // windows does not have symbolic links, so stat() is fine
  39. #define strigi_lstat stat
  40. #else
  41. #define strigi_lstat lstat
  42. #endif
  43. using namespace std;
  44. using namespace Strigi;
  45. namespace
  46. {
  47. /*!
  48. * @param path string containing path to check
  49. * Removes the terminating char to path.
  50. * Under Windows that char is '\', '/' under *nix
  51. */
  52. string fixPath (string path)
  53. {
  54. if ( path.c_str() == NULL || path.length() == 0 )
  55. return "";
  56. string temp(path);
  57. #ifdef HAVE_WINDOWS_H
  58. size_t l= temp.length();
  59. char* t = (char*)temp.c_str();
  60. for (size_t i=0;i<l;i++){
  61. if ( t[i] == '\\' )
  62. t[i] = '/';
  63. }
  64. temp[0] = tolower(temp.at(0));
  65. #endif
  66. char separator = '/';
  67. if (temp[temp.length() - 1 ] == separator)
  68. return temp.substr(0, temp.size() - 1);
  69. return temp;
  70. }
  71. }
  72. class FileLister::Private {
  73. public:
  74. char path[10000];
  75. STRIGI_MUTEX_DEFINE(mutex);
  76. DIR** dirs;
  77. DIR** dirsEnd;
  78. DIR** curDir;
  79. string::size_type* len;
  80. string::size_type* lenEnd;
  81. string::size_type* curLen;
  82. time_t mtime;
  83. struct dirent* subdir;
  84. struct stat dirstat;
  85. set<string> listedDirs;
  86. const AnalyzerConfiguration* const config;
  87. Private(const AnalyzerConfiguration* ic);
  88. ~Private();
  89. int nextFile(string& p, time_t& time) {
  90. int r;
  91. STRIGI_MUTEX_LOCK(&mutex);
  92. r = nextFile();
  93. if (r > 0) {
  94. p.assign(path, r);
  95. time = mtime;
  96. }
  97. STRIGI_MUTEX_UNLOCK(&mutex);
  98. return r;
  99. }
  100. void startListing(const std::string&);
  101. int nextFile();
  102. };
  103. FileLister::Private::Private(
  104. const AnalyzerConfiguration* ic) :
  105. config(ic) {
  106. STRIGI_MUTEX_INIT(&mutex);
  107. int nOpenDirs = 100;
  108. dirs = (DIR**)malloc(sizeof(DIR*)*nOpenDirs);
  109. dirsEnd = dirs + nOpenDirs;
  110. len = (string::size_type*)malloc(sizeof(string::size_type)*nOpenDirs);
  111. lenEnd = len + nOpenDirs;
  112. curDir = dirs - 1;
  113. }
  114. void
  115. FileLister::Private::startListing(const string& dir){
  116. listedDirs.clear();
  117. curDir = dirs;
  118. curLen = len;
  119. string::size_type len = dir.length();
  120. *curLen = len;
  121. strcpy(path, dir.c_str());
  122. if (len) {
  123. if (path[len-1] != '/') {
  124. path[len++] = '/';
  125. path[len] = 0;
  126. *curLen = len;
  127. }
  128. DIR* d = opendir(path);
  129. if (d) {
  130. *curDir = d;
  131. listedDirs.insert (path);
  132. } else {
  133. curDir--;
  134. }
  135. } else {
  136. curDir--;
  137. }
  138. }
  139. FileLister::Private::~Private() {
  140. while (curDir >= dirs) {
  141. if (*curDir) {
  142. closedir(*curDir);
  143. }
  144. curDir--;
  145. }
  146. free(dirs);
  147. free(len);
  148. STRIGI_MUTEX_DESTROY(&mutex);
  149. }
  150. int
  151. FileLister::Private::nextFile() {
  152. while (curDir >= dirs) {
  153. DIR* dir = *curDir;
  154. string::size_type l = *curLen;
  155. subdir = readdir(dir);
  156. while (subdir) {
  157. // skip the directories '.' and '..'
  158. char c1 = subdir->d_name[0];
  159. if (c1 == '.') {
  160. char c2 = subdir->d_name[1];
  161. if (c2 == '.' || c2 == '\0') {
  162. subdir = readdir(dir);
  163. continue;
  164. }
  165. }
  166. strcpy(path + l, subdir->d_name);
  167. string::size_type sl = l + strlen(subdir->d_name);
  168. if (strigi_lstat(path, &dirstat) == 0) {
  169. if (S_ISREG(dirstat.st_mode)) {
  170. if (config == 0 || config->indexFile(path, path+l)) {
  171. mtime = dirstat.st_mtime;
  172. return (int)sl;
  173. }
  174. } else if (dirstat.st_mode & S_IFDIR && (config == 0
  175. || config->indexDir(path, path+l))) {
  176. mtime = dirstat.st_mtime;
  177. strcpy(this->path+sl, "/");
  178. DIR* d = opendir(path);
  179. if (d) {
  180. curDir++;
  181. curLen++;
  182. dir = *curDir = d;
  183. l = *curLen = sl+1;
  184. listedDirs.insert ( path);
  185. }
  186. }
  187. }
  188. subdir = readdir(dir);
  189. }
  190. closedir(dir);
  191. curDir--;
  192. curLen--;
  193. }
  194. return -1;
  195. }
  196. FileLister::FileLister(const AnalyzerConfiguration* ic)
  197. : p(new Private(ic)) {
  198. }
  199. FileLister::~FileLister() {
  200. delete p;
  201. }
  202. void
  203. FileLister::startListing(const string& dir) {
  204. p->startListing(dir);
  205. }
  206. int
  207. FileLister::nextFile(std::string& path, time_t& time) {
  208. return p->nextFile(path, time);
  209. }
  210. int
  211. FileLister::nextFile(const char*& path, time_t& time) {
  212. int r = p->nextFile();
  213. if (r >= 0) {
  214. time = p->mtime;
  215. path = p->path;
  216. }
  217. return r;
  218. }
  219. void
  220. FileLister::skipTillAfter(const std::string& lastToSkip) {
  221. int r = p->nextFile();
  222. while (r >= 0 && p->path != lastToSkip) {
  223. r = p->nextFile();
  224. }
  225. }
  226. class DirLister::Private {
  227. public:
  228. STRIGI_MUTEX_DEFINE(mutex);
  229. list<string> todoPaths;
  230. const AnalyzerConfiguration* const config;
  231. Private(const AnalyzerConfiguration* ic) :config(ic) {}
  232. int nextDir(std::string& path,
  233. std::vector<std::pair<std::string, struct stat> >& dirs);
  234. };
  235. DirLister::DirLister(const AnalyzerConfiguration* ic)
  236. : p(new Private(ic)) {
  237. STRIGI_MUTEX_INIT(&p->mutex);
  238. }
  239. DirLister::~DirLister() {
  240. STRIGI_MUTEX_DESTROY(&p->mutex);
  241. delete p;
  242. }
  243. void
  244. DirLister::startListing(const string& dir) {
  245. STRIGI_MUTEX_LOCK(&p->mutex);
  246. p->todoPaths.push_back(dir);
  247. STRIGI_MUTEX_UNLOCK(&p->mutex);
  248. }
  249. void
  250. DirLister::stopListing() {
  251. STRIGI_MUTEX_LOCK(&p->mutex);
  252. p->todoPaths.clear();
  253. STRIGI_MUTEX_UNLOCK(&p->mutex);
  254. }
  255. int
  256. DirLister::Private::nextDir(std::string& path,
  257. std::vector<std::pair<std::string, struct stat> >& dirs) {
  258. string entryname;
  259. string entrypath;
  260. size_t entrypathlength;
  261. // check if there are more directories to work on
  262. // open the directory
  263. STRIGI_MUTEX_LOCK(&mutex);
  264. if (todoPaths.empty()) {
  265. STRIGI_MUTEX_UNLOCK(&mutex);
  266. return -1;
  267. }
  268. path.assign(todoPaths.front());
  269. todoPaths.pop_front();
  270. // Only unlock of the todo list is not empty.
  271. // If the list is empty, other threads must wait for this thread to populate
  272. // the list.
  273. bool mutexLocked = true;
  274. if (!todoPaths.empty()) {
  275. STRIGI_MUTEX_UNLOCK(&mutex);
  276. mutexLocked = false;
  277. }
  278. entrypathlength = path.length()+1;
  279. entrypath.assign(path);
  280. entrypath.append("/");
  281. dirs.clear();
  282. DIR* dir;
  283. if (path.size()) {
  284. dir = opendir(path.c_str());
  285. } else {
  286. // special case for root directory '/' on unix systems
  287. dir = opendir("/");
  288. }
  289. if (!dir) {
  290. int e = errno;
  291. if (mutexLocked) {
  292. STRIGI_MUTEX_UNLOCK(&mutex);
  293. }
  294. // if permission is denied, this is not an error
  295. return (e == EACCES) ?0 :-1;
  296. }
  297. struct dirent* entry = readdir(dir);
  298. struct stat entrystat;
  299. while (entry) {
  300. entryname.assign(entry->d_name);
  301. if (entryname != "." && entryname != "..") {
  302. entrypath.resize(entrypathlength);
  303. entrypath.append(entryname);
  304. if (strigi_lstat(entrypath.c_str(), &entrystat) == 0) {
  305. if (S_ISDIR(entrystat.st_mode)) {
  306. if (config == 0 ||
  307. config->indexDir(
  308. entrypath.c_str(), entryname.c_str())) {
  309. if (!mutexLocked) {
  310. STRIGI_MUTEX_LOCK(&mutex);
  311. }
  312. todoPaths.push_back(entrypath);
  313. STRIGI_MUTEX_UNLOCK(&mutex);
  314. mutexLocked = false;
  315. dirs.push_back(make_pair<string,struct stat>(
  316. entrypath, entrystat));
  317. }
  318. } else if (config == 0 || config->indexFile(entrypath.c_str(),
  319. entryname.c_str())) {
  320. dirs.push_back(
  321. make_pair<string,struct stat>(entrypath, entrystat));
  322. }
  323. }
  324. }
  325. entry = readdir(dir);
  326. }
  327. closedir(dir);
  328. if (mutexLocked) {
  329. STRIGI_MUTEX_UNLOCK(&mutex);
  330. }
  331. return 0;
  332. }
  333. int
  334. DirLister::nextDir(std::string& path,
  335. std::vector<std::pair<std::string, struct stat> >& dirs) {
  336. return p->nextDir(path, dirs);
  337. }
  338. void
  339. DirLister::skipTillAfter(const std::string& lastToSkip) {
  340. string path;
  341. vector<pair<string, struct stat> > dirs;
  342. while (nextDir(path, dirs) >= 0 && path != lastToSkip) {}
  343. }