Skip to content

Commit

Permalink
moved to headless chrome
Browse files Browse the repository at this point in the history
  • Loading branch information
segment-srl committed Nov 14, 2018
1 parent dcc0078 commit 4e98aa1
Show file tree
Hide file tree
Showing 21 changed files with 3,154 additions and 34 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
*.py[cod]
tmp.py
.idea
*.db
tmp
node_modules/
.*
!/.gitignore
package-lock.json
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,30 @@
Htcap is a web application scanner able to crawl single page application (SPA) in a recursive manner by intercepting ajax calls and DOM changes.
Htcap is not just another vulnerability scanner since it's focused mainly on the crawling process and uses external tools to discover vulnerabilities. It's designed to be a tool for both manual and automated penetration test of modern web applications.

This is the very first release that uses headless chrome instead of phantomjs.
Htcap’s Javascript crawling engine has been rewritten to take advantage of the new async/await features of ecmascript and has been converted to a nodjes module build on top of [Puppetteer](https://github.com/GoogleChrome/puppeteer).

More infos at [htcap.org](http://htcap.org).

## SETUP

### Requirements

1. Python 2.7
2. PhantomJS v2
2. Nodejs and npm
3. Sqlmap (for sqlmap scanner module)
4. Arachni (for arachni scanner module)

### Download and Run

```console
$ git clone https://github.com/segment-srl/htcap.git htcap
$ git clone https://github.com/fcavallarin/htcap.git htcap
$ htcap/htcap.py
```

PhantomJs can be downloaded [here](http://phantomjs.org//download.html). It comes as a self-contained executable with all libraries linked statically, so there is no need to install or compile anything else.


## DOCUMENTATION

Documentation, examples and demos can be found at the official website [http://htcap.org](http://htcap.org).
Documentation, examples and demos can be found at the official website [https://htcap.org](https://htcap.org).

## LICENSE

Expand Down
43 changes: 32 additions & 11 deletions core/crawl/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, argv):
self.request_patterns = []

self.defaults = {
"useragent": 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
"useragent": 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3582.0 Safari/537.36',
"num_threads": 10,
"max_redirects": 10,
"out_file_overwrite": False,
Expand All @@ -75,7 +75,9 @@ def __init__(self, argv):
"max_post_depth": 10,
"override_timeout_functions": True,
'crawl_forms': True, # only if mode == CRAWLMODE_AGGRESSIVE
'deduplicate_pages': True
'deduplicate_pages': True,
'use_legacy_browser': False,
'headless_chrome': True
}


Expand Down Expand Up @@ -123,6 +125,8 @@ def usage(self):
" -O dont't override timeout functions (setTimeout, setInterval)\n"
" -K keep elements in the DOM (prevent removal)\n"
" -e disable hEuristic page deduplication\n"
" -L use Legacy browser (phantomjs) instead of chrome\n"
" -l do not run chrome in headless mode\n"
)


Expand Down Expand Up @@ -373,10 +377,11 @@ def main(self, argv):
Shared.th_condition = threading.Condition()
Shared.main_condition = threading.Condition()


probe_cmd = get_phantomjs_cmd()
if not probe_cmd:
print "Error: unable to find phantomjs executable"
deps_errors = check_dependences(self.base_dir)
if len(deps_errors) > 0:
print "Dependences errors: "
for err in deps_errors:
print " %s" % err
sys.exit(1)

start_cookies = []
Expand All @@ -398,7 +403,7 @@ def main(self, argv):
user_script = None

try:
opts, args = getopt.getopt(argv, 'hc:t:jn:x:A:p:d:BGR:U:wD:s:m:C:qr:SIHFP:Ovu:e')
opts, args = getopt.getopt(argv, 'hc:t:jn:x:A:p:d:BGR:U:wD:s:m:C:qr:SIHFP:Ovu:eLl')
except getopt.GetoptError as err:
print str(err)
sys.exit(1)
Expand Down Expand Up @@ -491,6 +496,15 @@ def main(self, argv):
sys.exit(1)
elif o == "-e":
Shared.options['deduplicate_pages'] = False
elif o == "-L":
Shared.options['use_legacy_browser'] = True
elif o == "-l":
Shared.options['headless_chrome'] = False

probe_cmd = get_phantomjs_cmd() if Shared.options['use_legacy_browser'] else get_node_cmd()
if not probe_cmd: # maybe useless
print "Error: unable to find node (or phantomjs) executable"
sys.exit(1)


if Shared.options['scope'] != CRAWLSCOPE_DOMAIN and len(Shared.allowed_domains) > 0:
Expand All @@ -508,11 +522,18 @@ def main(self, argv):
if Shared.options['mode'] == CRAWLMODE_PASSIVE:
probe_options.append("-t") # dont trigger events

if Shared.options['proxy']:
probe_cmd.append("--proxy-type=%s" % Shared.options['proxy']['proto'])
probe_cmd.append("--proxy=%s:%s" % (Shared.options['proxy']['host'], Shared.options['proxy']['port']))

probe_cmd.append(self.base_dir + 'probe/analyze.js')
if Shared.options['use_legacy_browser']:
if Shared.options['proxy']:
probe_cmd.append("--proxy-type=%s" % Shared.options['proxy']['proto'])
probe_cmd.append("--proxy=%s:%s" % (Shared.options['proxy']['host'], Shared.options['proxy']['port']))
probe_cmd.append(self.base_dir + 'probe/analyze.js')
else:
if Shared.options['proxy']:
probe_options.extend(["-y", "%s:%s:%s" % (Shared.options['proxy']['proto'], Shared.options['proxy']['host'], Shared.options['proxy']['port'])])
if not Shared.options['headless_chrome']:
probe_options.append("-l")
probe_cmd.append(self.base_dir + 'probe/chrome-probe/analyze.js')


if len(Shared.excluded_urls) > 0:
Expand Down
13 changes: 7 additions & 6 deletions core/crawl/crawler_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def load_probe_json(self, jsn):
try:
return json.loads(jsn)
except Exception:
#print "-- JSON DECODE ERROR %s" % jsn
print "-- JSON DECODE ERROR %s" % jsn
raise


Expand Down Expand Up @@ -146,15 +146,16 @@ def send_probe(self, request, errors):

# print cmd_to_str(Shared.probe_cmd + params)
# print ""

cmd = CommandExecutor(Shared.probe_cmd + params, True)
jsn, err = cmd.execute(Shared.options['process_timeout'] + 2)

cmd = CommandExecutor(Shared.probe_cmd + params)
jsn = cmd.execute(Shared.options['process_timeout'] + 2)
if err:
print err
jsn = None

if jsn == None:
errors.append(ERROR_PROBEKILLED)
# time.sleep(self.process_retries_interval) # ... ???
# retries -= 1
# continue
break


Expand Down
4 changes: 2 additions & 2 deletions core/crawl/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def request_is_crawlable(request):
return False

types = [REQTYPE_LINK, REQTYPE_REDIRECT]
if Shared.options['mode'] == CRAWLMODE_AGGRESSIVE and Shared.options['crawl_forms']:
types.append(REQTYPE_FORM)
# if Shared.options['mode'] == CRAWLMODE_AGGRESSIVE and Shared.options['crawl_forms']:
# types.append(REQTYPE_FORM)

return request.type in types and re.match("^https?://", request.url, re.I)

Expand Down
167 changes: 167 additions & 0 deletions core/crawl/probe/chrome-probe/analyze.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
HTCAP - 1.2
http://htcap.org
Author: [email protected]
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.
*/


"use strict";

const htcap = require("./htcap");
const utils = require('./utils');
const process = require('process');


var sleep = function(n){
return new Promise(resolve => {
setTimeout(resolve, n);
});
};



var argv = utils.parseArgs(process.argv, "hVaftUJdICc:MSEp:Tsx:A:r:mHX:PD:R:Oi:u:vy:l", {});
var options = argv.opts

var targetUrl = argv.args[0];



if(!targetUrl){
utils.usage();
process.exit(-1);
}

targetUrl = targetUrl.trim();
if(targetUrl.length < 4 || targetUrl.substring(0,4).toLowerCase() != "http"){
targetUrl = "http://" + targetUrl;
}



htcap.launch(targetUrl, options).then( crawler => {
const page = crawler.page();
var execTO = null;

console.log("[");

function exit(){
clearTimeout(execTO);
crawler.browser().close();
}

crawler.on("redirect", async function(e, crawler){
// console.log(crawler.redirect());
// console.log(e.params.url);
// utils.printCookies(crawler);
// utils.printRequest({type:'link',method:"GET",url:e.params.url});
// exit();
});


crawler.on("domcontentloaded", async function(e, crawler){
//utils.printCookies(crawler);
await utils.printLinks("html", crawler.page())
await utils.printForms("html", crawler.page())

//await sleep(4000)
});

crawler.on("start", function(e, crawler){
//console.log("--->Start");
})


crawler.on("newdom", async function(e, crawler){
//console.log(e.params)
})

crawler.on("xhr", async function(e, crawler){
utils.printRequest(e.params.request)

//return false
});

crawler.on("xhrCompleted", function(e, crawler){
//console.log("XHR completed")
});


crawler.on("jsonp", function(e, crawler){
utils.printRequest(e.params.request)
});

crawler.on("jsonpCompleted", function(e, crawler){

});

crawler.on("websocket", function(e, crawler){
utils.printRequest(e.params.request)
});

crawler.on("websocketMessage", function(e, crawler){

});

crawler.on("websocketSend", function(e, crawler){

});

crawler.on("formSubmit", function(e, crawler){
utils.printRequest(e.params.request)
});

crawler.on("navigation", function(e, crawler){
e.params.request.type="link";
utils.printRequest(e.params.request)
});

crawler.on("eventtriggered", function(e, crawler){
//console.log(e.params)
});

crawler.on("triggerevent", function(e, crawler){
//console.log(e.params)
});

crawler.on("earlydetach", function(e, crawler){
//console.log('["warning","earlydetach of element ' + e.params.node + '],')
//crawler.browser().close();
});


async function end(){
if(!crawler.redirect()){
const el = await crawler.page().$("html");
const v = await el.getProperty('innerText');
const hash = await v.jsonValue();
var json = '["page_hash",' + JSON.stringify(hash) + '],';
console.log(json);

if(options.returnHtml){
json = '["html",' + JSON.stringify(hash) + '],';
console.log(json);
}
}

utils.printStatus(crawler);
exit();
}

crawler.on("end", end);

execTO = setTimeout(function(){ // (very dirty solution)
crawler.on("end", function(){});
crawler.errors().push(["probe_timeout", "maximum execution time reached"]);
end();
}, options.maxExecTime);


crawler.start()

})
5 changes: 5 additions & 0 deletions core/crawl/probe/chrome-probe/ckdeps.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
try{
require("puppeteer")
} catch(e) {
console.log("puppeteer")
}
Loading

0 comments on commit 4e98aa1

Please sign in to comment.