Code Documentation 3.1
Social Network Visualizer
Loading...
Searching...
No Matches
webcrawler.h
Go to the documentation of this file.
1
17#ifndef WEBCRAWLER_H
18#define WEBCRAWLER_H
19
20#include <QNetworkReply>
21#include <QQueue>
22
23QT_BEGIN_NAMESPACE
24class QUrl;
25QT_END_NAMESPACE
26
27using namespace std;
28
34class WebCrawler : public QObject {
35 Q_OBJECT
36 // QThread wc_spiderThread;
37public:
38
40 QQueue<QUrl> *urlQueue,
41 const QUrl &startUrl,
42 const QStringList &urlPatternsIncluded,
43 const QStringList &urlPatternsExcluded,
44 const QStringList &linkClasses,
45 const int &maxNodes,
46 const int &maxLinksPerPage,
47 const bool &intLinks = true,
48 const bool &childLinks = true,
49 const bool &parentLinks = false,
50 const bool &selfLinks = false,
51 const bool &extLinksIncluded = false,
52 const bool &extLinksCrawl = false,
53 const bool &socialLinks = false,
54 const int &delayBetween = 0
55 );
56
58
59public slots:
60 void parse(QNetworkReply *reply);
61 void newLink(int s, QUrl target, bool enqueue_to_frontier);
62
63signals:
64 void signalCreateNode(const int &no,
65 const QString &url,
66 const bool &signalMW=false);
67 void signalCreateEdge (const int &source, const int &target);
69 void finished (QString);
70
71private:
72 QQueue<QUrl> *m_urlQueue;
73 QMap <QUrl, int> knownUrls;
78
87
89
91 QString urlPattern;
93 QStringList m_linkClasses;
95 QStringList::const_iterator constIterator;
99};
100
101
102
103#endif
The WebCrawler class Parses HTML code it receives, locates urls inside it and puts them into a url qu...
Definition webcrawler.h:34
~WebCrawler()
Definition webcrawler.cpp:591
QStringList m_urlPatternsIncluded
Definition webcrawler.h:90
bool m_urlPatternNotAllowed
Definition webcrawler.h:97
QStringList m_socialLinksExcluded
Definition webcrawler.h:94
QString urlPattern
Definition webcrawler.h:91
bool m_childLinks
Definition webcrawler.h:80
void finished(QString)
void parse(QNetworkReply *reply)
Called from Graph when a network reply for a new page download has finished to do the actual parsing ...
Definition webcrawler.cpp:116
void newLink(int s, QUrl target, bool enqueue_to_frontier)
??
Definition webcrawler.cpp:486
int m_maxUrls
Definition webcrawler.h:75
bool m_extLinksCrawl
Definition webcrawler.h:84
QUrl m_initialUrl
Definition webcrawler.h:74
int m_maxLinksPerPage
Definition webcrawler.h:77
bool m_parentLinks
Definition webcrawler.h:81
int m_discoveredNodes
Definition webcrawler.h:76
bool m_urlPatternAllowed
Definition webcrawler.h:96
int m_delayBetween
Definition webcrawler.h:88
bool m_extLinksIncluded
Definition webcrawler.h:83
QStringList m_linkClasses
Definition webcrawler.h:93
bool m_urlIsSocial
Definition webcrawler.h:86
QQueue< QUrl > * m_urlQueue
Definition webcrawler.h:72
bool m_intLinks
Definition webcrawler.h:79
void signalStartSpider()
bool m_socialLinks
Definition webcrawler.h:85
void signalCreateNode(const int &no, const QString &url, const bool &signalMW=false)
QStringList m_urlPatternsExcluded
Definition webcrawler.h:92
QStringList::const_iterator constIterator
Definition webcrawler.h:95
void signalCreateEdge(const int &source, const int &target)
bool m_selfLinks
Definition webcrawler.h:82
QMap< QUrl, int > knownUrls
Definition webcrawler.h:73
bool m_linkClassAllowed
Definition webcrawler.h:98