webcrawler.h
Go to the documentation of this file.
1 /***************************************************************************
2  SocNetV: Social Network Visualizer
3  version: 2.2
4  Written in Qt
5 
6  webcrawler.h - description
7  -------------------
8  copyright : (C) 2005-2017 by Dimitris B. Kalamaras
9  project site : http://socnetv.org
10 
11  ***************************************************************************/
12 
13 /*******************************************************************************
14 * This program is free software: you can redistribute it and/or modify *
15 * it under the terms of the GNU General Public License as published by *
16 * the Free Software Foundation, either version 3 of the License, or *
17 * (at your option) any later version. *
18 * *
19 * This program is distributed in the hope that it will be useful, *
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
22 * GNU General Public License for more details. *
23 * *
24 * You should have received a copy of the GNU General Public License *
25 * along with this program. If not, see <http://www.gnu.org/licenses/>. *
26 ********************************************************************************/
27 
28 #ifndef WEBCRAWLER_H
29 #define WEBCRAWLER_H
30 
31 #include <QNetworkReply>
32 #include <QUrl>
33 
34 class QNetworkAccessManager;
35 class QNetworkRequest;
36 
37 using namespace std;
38 
39 
40 class WebCrawler_Parser : public QObject {
41  Q_OBJECT
42 public:
43  WebCrawler_Parser(QString seed, int maxNodes, int maxLinksPerPage,
44  bool extLinks, bool intLinks);
46 public slots:
47  void parse(QNetworkReply *reply);
48  void newLink(int s, QUrl target, bool enqueue_to_frontier);
49 signals:
50  void signalCreateNode(const int &no,
51  const QString &url,
52  const bool &signalMW=false);
53  void signalCreateEdge (const int &source, const int &target);
54  void startSpider();
55  void finished (QString);
56 private:
57  QByteArray ba;
58  QMap <QUrl, int> knownUrls;
59  QUrl m_seed;
63  bool m_extLinks, m_intLinks;
64 };
65 
66 
67 class WebCrawler_Spider : public QObject {
68  Q_OBJECT
69 public:
70  WebCrawler_Spider(QString seed, int maxNodes, int maxLinksPerPage
71  ,bool extLinks, bool intLinks);
73 public slots:
74  void get();
75  void httpFinished(QNetworkReply *reply);
76 
77 signals:
78  void parse(QNetworkReply *reply);
79  void finished (QString);
80 private:
81  QNetworkAccessManager *http;
82  QNetworkRequest *request;
83  QNetworkReply *reply;
84  QUrl currentUrl ;
85  QString m_seed;
89  bool m_extLinks, m_intLinks;
90 
91 };
92 
93 #endif
QNetworkAccessManager * http
Definition: webcrawler.h:81
Definition: webcrawler.h:67
QNetworkReply * reply
Definition: webcrawler.h:83
QUrl currentUrl
Definition: webcrawler.h:84
int m_maxPages
Definition: webcrawler.h:60
bool m_intLinks
Definition: webcrawler.h:63
Definition: webcrawler.h:40
int m_discoveredNodes
Definition: webcrawler.h:61
QNetworkRequest * request
Definition: webcrawler.h:82
QUrl m_seed
Definition: webcrawler.h:59
QByteArray ba
Definition: webcrawler.h:57
int m_maxPages
Definition: webcrawler.h:86
QString m_seed
Definition: webcrawler.h:85
QMap< QUrl, int > knownUrls
Definition: webcrawler.h:58
int m_maxLinksPerPage
Definition: webcrawler.h:62
int m_visitedNodes
Definition: webcrawler.h:87
bool m_intLinks
Definition: webcrawler.h:89
int m_maxLinksPerPage
Definition: webcrawler.h:88