Go to the documentation of this file.
31 #include <QNetworkReply>
51 QQueue<QUrl> *urlQueue,
53 const QStringList &urlPatternsIncluded,
54 const QStringList &urlPatternsExcluded,
55 const QStringList &linkClasses,
57 const int &maxLinksPerPage,
58 const bool &intLinks =
true,
59 const bool &childLinks =
true,
60 const bool &parentLinks =
false,
61 const bool &selfLinks =
false,
62 const bool &extLinksIncluded =
false,
63 const bool &extLinksCrawl =
false,
64 const bool &socialLinks =
false,
65 const int &delayBetween = 0
71 void parse(QNetworkReply *reply);
72 void newLink(
int s, QUrl target,
bool enqueue_to_frontier);
75 void signalCreateNode(
const int &no,
77 const bool &signalMW=
false);
78 void signalCreateEdge (
const int &source,
const int &target);
79 void signalStartSpider();
80 void finished (QString);
QStringList::const_iterator constIterator
Definition: webcrawler.h:106
QString urlPattern
Definition: webcrawler.h:102
int m_delayBetween
Definition: webcrawler.h:99
int m_maxUrls
Definition: webcrawler.h:86
bool m_urlPatternNotAllowed
Definition: webcrawler.h:108
bool m_extLinksIncluded
Definition: webcrawler.h:94
QUrl m_initialUrl
Definition: webcrawler.h:85
QQueue< QUrl > * m_urlQueue
Definition: webcrawler.h:83
QStringList m_urlPatternsExcluded
Definition: webcrawler.h:103
bool m_childLinks
Definition: webcrawler.h:91
bool m_urlPatternAllowed
Definition: webcrawler.h:107
bool m_selfLinks
Definition: webcrawler.h:93
bool m_urlIsSocial
Definition: webcrawler.h:97
int m_maxLinksPerPage
Definition: webcrawler.h:88
bool m_socialLinks
Definition: webcrawler.h:96
QStringList m_socialLinksExcluded
Definition: webcrawler.h:105
The WebCrawler class Parses HTML code it receives, locates urls inside it and puts them into a url qu...
Definition: webcrawler.h:45
bool m_intLinks
Definition: webcrawler.h:90
QStringList m_urlPatternsIncluded
Definition: webcrawler.h:101
QStringList m_linkClasses
Definition: webcrawler.h:104
bool m_parentLinks
Definition: webcrawler.h:92
bool m_linkClassAllowed
Definition: webcrawler.h:109
QMap< QUrl, int > knownUrls
Definition: webcrawler.h:84
int m_discoveredNodes
Definition: webcrawler.h:87
bool m_extLinksCrawl
Definition: webcrawler.h:95