#include <webcrawler.h>
|
void | parse (QNetworkReply *reply) |
| Called when NetworkManager has finished. This method does the actual parsing of each page's html source from the reply bytearray. First, we start by reading all from http reply to a QString called page. Then we parse the page string, searching for url substrings. More...
|
|
void | newLink (int s, QUrl target, bool enqueue_to_frontier) |
| signals node creation - Called from wc_parser::load() More...
|
|
|
| WebCrawler_Parser () |
| constructor called from parent thread - does nothing More...
|
|
| ~WebCrawler_Parser () |
|
void | load (const QString &seed, const QStringList &urlPatternsIncluded, const QStringList &urlPatternsExcluded, const QStringList &linkClasses, const int &maxNodes, const int &maxLinksPerPage, const bool &intLinks, const bool &childLinks, const bool &parentLinks, const bool &selfLinks, const bool &extLinksIncluded, const bool &extLinksCrawl, const bool &socialLinks) |
| Called from parent Graph thread. Inits variables. More...
|
|
◆ WebCrawler_Parser()
WebCrawler_Parser::WebCrawler_Parser |
( |
| ) |
|
constructor called from parent thread - does nothing
◆ ~WebCrawler_Parser()
WebCrawler_Parser::~WebCrawler_Parser |
( |
| ) |
|
◆ finished
void WebCrawler_Parser::finished |
( |
QString |
| ) |
|
|
signal |
◆ load()
void WebCrawler_Parser::load |
( |
const QString & |
url, |
|
|
const QStringList & |
urlPatternsIncluded, |
|
|
const QStringList & |
urlPatternsExcluded, |
|
|
const QStringList & |
linkClasses, |
|
|
const int & |
maxN, |
|
|
const int & |
maxLinksPerPage, |
|
|
const bool & |
intLinks, |
|
|
const bool & |
childLinks, |
|
|
const bool & |
parentLinks, |
|
|
const bool & |
selfLinks, |
|
|
const bool & |
extLinksIncluded, |
|
|
const bool & |
extLinksCrawl, |
|
|
const bool & |
socialLinks |
|
) |
| |
Called from parent Graph thread. Inits variables.
- Parameters
-
url | |
maxN | |
maxLinksPerPage | |
extLinks | |
intLinks | |
◆ newLink
void WebCrawler_Parser::newLink |
( |
int |
s, |
|
|
QUrl |
target, |
|
|
bool |
enqueue_to_frontier |
|
) |
| |
|
slot |
signals node creation - Called from wc_parser::load()
- Parameters
-
s | |
target | |
enqueue_to_frontier | |
◆ parse
void WebCrawler_Parser::parse |
( |
QNetworkReply * |
reply | ) |
|
|
slot |
Called when NetworkManager has finished. This method does the actual parsing of each page's html source from the reply bytearray. First, we start by reading all from http reply to a QString called page. Then we parse the page string, searching for url substrings.
- Parameters
-
◆ signalCreateEdge
void WebCrawler_Parser::signalCreateEdge |
( |
const int & |
source, |
|
|
const int & |
target |
|
) |
| |
|
signal |
◆ signalCreateNode
void WebCrawler_Parser::signalCreateNode |
( |
const int & |
no, |
|
|
const QString & |
url, |
|
|
const bool & |
signalMW = false |
|
) |
| |
|
signal |
◆ startSpider
void WebCrawler_Parser::startSpider |
( |
| ) |
|
|
signal |
◆ ba
QByteArray WebCrawler_Parser::ba |
|
private |
◆ constIterator
QStringList::const_iterator WebCrawler_Parser::constIterator |
|
private |
◆ knownUrls
QMap<QUrl, int> WebCrawler_Parser::knownUrls |
|
private |
◆ m_childLinks
bool WebCrawler_Parser::m_childLinks |
|
private |
◆ m_discoveredNodes
int WebCrawler_Parser::m_discoveredNodes |
|
private |
◆ m_extLinksCrawl
bool WebCrawler_Parser::m_extLinksCrawl |
|
private |
◆ m_extLinksIncluded
bool WebCrawler_Parser::m_extLinksIncluded |
|
private |
◆ m_intLinks
bool WebCrawler_Parser::m_intLinks |
|
private |
◆ m_linkClassAllowed
bool WebCrawler_Parser::m_linkClassAllowed |
|
private |
◆ m_linkClasses
QStringList WebCrawler_Parser::m_linkClasses |
|
private |
◆ m_maxLinksPerPage
int WebCrawler_Parser::m_maxLinksPerPage |
|
private |
◆ m_maxNodes
int WebCrawler_Parser::m_maxNodes |
|
private |
◆ m_parentLinks
bool WebCrawler_Parser::m_parentLinks |
|
private |
◆ m_seed
QUrl WebCrawler_Parser::m_seed |
|
private |
◆ m_selfLinks
bool WebCrawler_Parser::m_selfLinks |
|
private |
◆ m_socialLinks
bool WebCrawler_Parser::m_socialLinks |
|
private |
◆ m_socialLinksExcluded
QStringList WebCrawler_Parser::m_socialLinksExcluded |
|
private |
◆ m_urlIsSocial
bool WebCrawler_Parser::m_urlIsSocial |
|
private |
◆ m_urlPatternAllowed
bool WebCrawler_Parser::m_urlPatternAllowed |
|
private |
◆ m_urlPatternNotAllowed
bool WebCrawler_Parser::m_urlPatternNotAllowed |
|
private |
◆ m_urlPatternsExcluded
QStringList WebCrawler_Parser::m_urlPatternsExcluded |
|
private |
◆ m_urlPatternsIncluded
QStringList WebCrawler_Parser::m_urlPatternsIncluded |
|
private |
◆ urlPattern
QString WebCrawler_Parser::urlPattern |
|
private |
The documentation for this class was generated from the following files: