20#include <QNetworkReply>
40 QQueue<QUrl> *urlQueue,
42 const QStringList &urlPatternsIncluded,
43 const QStringList &urlPatternsExcluded,
44 const QStringList &linkClasses,
46 const int &maxLinksPerPage,
47 const bool &intLinks =
true,
48 const bool &childLinks =
true,
49 const bool &parentLinks =
false,
50 const bool &selfLinks =
false,
51 const bool &extLinksIncluded =
false,
52 const bool &extLinksCrawl =
false,
53 const bool &socialLinks =
false,
54 const int &delayBetween = 0
60 void parse(QNetworkReply *reply);
61 void newLink(
int s, QUrl target,
bool enqueue_to_frontier);
66 const bool &signalMW=
false);
The WebCrawler class Parses HTML code it receives, locates urls inside it and puts them into a url qu...
Definition webcrawler.h:34
~WebCrawler()
Definition webcrawler.cpp:591
QStringList m_urlPatternsIncluded
Definition webcrawler.h:90
bool m_urlPatternNotAllowed
Definition webcrawler.h:97
QStringList m_socialLinksExcluded
Definition webcrawler.h:94
QString urlPattern
Definition webcrawler.h:91
bool m_childLinks
Definition webcrawler.h:80
void parse(QNetworkReply *reply)
Called from Graph when a network reply for a new page download has finished to do the actual parsing ...
Definition webcrawler.cpp:116
void newLink(int s, QUrl target, bool enqueue_to_frontier)
??
Definition webcrawler.cpp:486
int m_maxUrls
Definition webcrawler.h:75
bool m_extLinksCrawl
Definition webcrawler.h:84
QUrl m_initialUrl
Definition webcrawler.h:74
int m_maxLinksPerPage
Definition webcrawler.h:77
bool m_parentLinks
Definition webcrawler.h:81
int m_discoveredNodes
Definition webcrawler.h:76
bool m_urlPatternAllowed
Definition webcrawler.h:96
int m_delayBetween
Definition webcrawler.h:88
bool m_extLinksIncluded
Definition webcrawler.h:83
QStringList m_linkClasses
Definition webcrawler.h:93
bool m_urlIsSocial
Definition webcrawler.h:86
QQueue< QUrl > * m_urlQueue
Definition webcrawler.h:72
bool m_intLinks
Definition webcrawler.h:79
bool m_socialLinks
Definition webcrawler.h:85
void signalCreateNode(const int &no, const QString &url, const bool &signalMW=false)
QStringList m_urlPatternsExcluded
Definition webcrawler.h:92
QStringList::const_iterator constIterator
Definition webcrawler.h:95
void signalCreateEdge(const int &source, const int &target)
bool m_selfLinks
Definition webcrawler.h:82
QMap< QUrl, int > knownUrls
Definition webcrawler.h:73
bool m_linkClassAllowed
Definition webcrawler.h:98