Stable release v2.2.2 (Fixes #122)

s0md3v · Apr 5, 2019 · dab63ee · dab63ee
2 parents 25abb08 + 5d468cf
commit dab63ee
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 140 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,6 @@ language: python
 os:
  - linux
 python:
- - 2.7
  - 3.6
 install:
  - pip install -r requirements.txt
@@ -14,4 +13,4 @@ before_script:
  - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 script:
  - python photon.py -u "https://stackoverflow.com" -l 1 -d 1 -t 100 --regex "\d{10}" --dns --output="d3v"
- - python photon.py -u "https://stackoverflow.com" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --ninja
+ - python photon.py -u "https://rocket.chat" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --wayback
diff --git a/core/flash.py b/core/flash.py
@@ -1,55 +1,17 @@
 from __future__ import print_function
-import sys
-import threading
+import concurrent.futures
 
 from core.colors import info
 
-try:
- import concurrent.futures
-except ImportError:
- pass
-
-
-def threader(function, *urls):
- """Start multiple threads for a function."""
- threads = []
- # Because URLs is a tuple
- urls = urls[0]
- # Iterating over URLs
- for url in urls:
- task = threading.Thread(target=function, args=(url,))
- threads.append(task)
- # Start threads
- for thread in threads:
- thread.start()
- # Wait for all threads to complete their work
- for thread in threads:
- thread.join()
- # Delete threads
- del threads[:]
-
-
 def flash(function, links, thread_count):
  """Process the URLs and uses a threadpool to execute a function."""
  # Convert links (set) to list
  links = list(links)
- if sys.version_info < (3, 2):
- for begin in range(0, len(links), thread_count): # Range with step
- end = begin + thread_count
- splitted = links[begin:end]
- threader(function, splitted)
- progress = end
- if progress > len(links): # Fix if overflow
- progress = len(links)
- print('\r%s Progress: %i/%i' % (info, progress, len(links)),
- end='\r')
- sys.stdout.flush()
- else:
- threadpool = concurrent.futures.ThreadPoolExecutor(
+ threadpool = concurrent.futures.ThreadPoolExecutor(
  max_workers=thread_count)
-  futures = (threadpool.submit(function, link) for link in links)
-  for i, _ in enumerate(concurrent.futures.as_completed(futures)):
-  if i + 1 == len(links) or (i + 1) % thread_count == 0:
-  print('%s Progress: %i/%i' % (info, i + 1, len(links)),
-  end='\r')
+ futures = (threadpool.submit(function, link) for link in links)
+ for i, _ in enumerate(concurrent.futures.as_completed(futures)):
+ if i + 1 == len(links) or (i + 1) % thread_count == 0:
+ print('%s Progress: %i/%i' % (info, i + 1, len(links)),
+ end='\r')
  print('')
diff --git a/core/requester.py b/core/requester.py
@@ -16,7 +16,6 @@ def requester(
  headers=None,
  timeout=10,
  host=None,
- ninja=False,
  user_agents=None,
  failed=None,
  processed=None
@@ -32,7 +31,7 @@ def requester(
  # Pause/sleep the program for specified time
  time.sleep(delay)
 
- def normal(url):
+ def make_request(url):
  """Default request"""
  final_headers = headers or {
  'Host': host,
@@ -66,50 +65,4 @@ def normal(url):
  response.close()
  return 'dummy'
 
- def facebook(url):
- """Interact with the developer.facebook.com API."""
- return requests.get(
- 'https://developers.facebook.com/tools/debug/echo/?q=' + url,
- verify=False
- ).text
-
- def pixlr(url):
- """Interact with the pixlr.com API."""
- if url == main_url:
- # Because pixlr throws error if http://example.com is used
- url = main_url + '/'
- return requests.get(
- 'https://pixlr.com/proxy/?url=' + url,
- headers={'Accept-Encoding': 'gzip'},
- verify=False
- ).text
-
- def code_beautify(url):
- """Interact with the codebeautify.org API."""
- headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0',
- 'Accept': 'text/plain, */*; q=0.01',
- 'Accept-Encoding': 'gzip',
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Origin': 'https://codebeautify.org',
- 'Connection': 'close',
- }
- return requests.post(
- 'https://codebeautify.com/URLService',
- headers=headers,
- data='path=' + url,
- verify=False
- ).text
-
- def photopea(url):
- """Interact with the www.photopea.com API."""
- return requests.get(
- 'https://www.photopea.com/mirror.php?url=' + url, verify=False).text
-
- if ninja: # If the ninja mode is enabled
- # Select a random request function i.e. random API
- response = random.choice(
- [photopea, normal, facebook, pixlr, code_beautify])(url)
- return response or 'dummy'
- else:
- return normal(url)
+ return make_request(url)
diff --git a/core/utils.py b/core/utils.py
@@ -41,7 +41,8 @@ def is_link(url, processed, files):
  is_file = url.endswith(BAD_TYPES)
  if is_file:
  files.add(url)
- return is_file
+ return False
+ return True
  return False
 
 
@@ -78,7 +79,7 @@ def writer(datasets, dataset_names, output_dir):
  filepath = output_dir + '/' + dataset_name + '.txt'
  with open(filepath, 'w+') as out_file:
  joined = '\n'.join(dataset)
- out_file.write(str(joined.encode('utf-8')))
+ out_file.write(str(joined.encode('utf-8').decode('utf-8')))
  out_file.write('\n')
 
 def timer(diff, processed):

diff --git a/photon.py b/photon.py
@@ -6,13 +6,28 @@
 import argparse
 import os
 import re
+import requests
 import sys
 import time
 import warnings
 
-import requests
-
 from core.colors import good, info, run, green, red, white, end
+
+# Just a fancy ass banner
+print('''%s ____ __ __
+ / %s__%s \/ /_ ____ / /_____ ____
+ / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
+ / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
+ /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.2%s\n''' %
+ (red, white, red, white, red, white, red, white, red, white, red, white,
+ red, white, end))
+
+try:
+ from urllib.parse import urlparse # For Python 3
+except ImportError:
+ print ('%s Photon runs only on Python 3.2 and above.' % info)
+ quit()
+
 import core.config
 from core.config import INTELS
 from core.flash import flash
@@ -23,28 +38,6 @@
 from core.utils import top_level, extract_headers, verb, is_link, entropy, regxy, remove_regex, timer, writer
 from core.zap import zap
 
-try:
- from urllib.parse import urlparse # For Python 3
- python2, python3 = False, True
-except ImportError:
- from urlparse import urlparse # For Python 2
- python2, python3 = True, False
-
-
-try:
- input = raw_input
-except NameError:
- pass
-
-
-# Just a fancy ass banner
-print('''%s ____ __ __
- / %s__%s \/ /_ ____ / /_____ ____
- / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\
- / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / /
- /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.1%s\n''' %
- (red, white, red, white, red, white, red, white, red, white, red, white,
- red, white, end))
 
 # Disable SSL related warnings
 warnings.filterwarnings('ignore')
@@ -82,8 +75,6 @@
  action='store_true')
 parser.add_argument('--dns', help='enumerate subdomains and DNS data',
  dest='dns', action='store_true')
-parser.add_argument('--ninja', help='ninja mode', dest='ninja',
- action='store_true')
 parser.add_argument('--keys', help='find secret keys', dest='api',
  action='store_true')
 parser.add_argument('--update', help='update photon', dest='update',
@@ -118,7 +109,6 @@
 timeout = args.timeout or 6 # HTTP request timeout
 cook = args.cook or None # Cookie
 api = bool(args.api) # Extract high entropy strings i.e. API keys and stuff
-ninja = bool(args.ninja) # Ninja mode toggle
 crawl_level = args.level or 2 # Crawling level
 thread_count = args.threads or 2 # Number of threads
 only_urls = bool(args.only_urls) # Only URLs mode is off by default
@@ -135,12 +125,11 @@
 # URLs that have get params in them e.g. example.com/page.php?id=2
 fuzzable = set()
 endpoints = set() # URLs found from javascript files
-processed = set() # URLs that have been crawled
+processed = set(['dummy']) # URLs that have been crawled
 # URLs that belong to the target i.e. in-scope
 internal = set(args.seeds)
 
 everything = []
-bad_intel = set() # Unclean intel urls
 bad_scripts = set() # Unclean javascript file urls
 
 core.config.verbose = verbose
@@ -180,13 +169,13 @@
 
 supress_regex = False
 
-def intel_extractor(response):
+def intel_extractor(url, response):
  """Extract intel from the response body."""
  matches = re.findall(r'([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)', response)
  if matches:
  for match in matches:
  verb('Intel', match)
- bad_intel.add(match)
+ intel.add(url + ':' + ''.join(list(match)))
 
 
 def js_extractor(response):
@@ -198,12 +187,22 @@ def js_extractor(response):
  verb('JS file', match)
  bad_scripts.add(match)
 
+def remove_file(url):
+ if url.count('/') > 2:
+ replacable = re.search(r'/[^/]*?$', url).group()
+ if replacable != '/':
+ return url.replace(replacable, '')
+ else:
+ return url
+ else:
+ return url
+
 def extractor(url):
  """Extract details from the response body."""
- response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed)
+ response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed)
  if clone:
  mirror(url, response)
- matches = re.findall(r'<[aA].*(href|HREF)=([^\s>]+)', response)
+ matches = re.findall(r'<[aA][^>]*?(href|HREF)=([^\s>]+)', response)
  for link in matches:
  # Remove everything after a "#" to deal with in-page anchors
  link = link[1].replace('\'', '').replace('"', '').split('#')[0]
@@ -219,19 +218,25 @@ def extractor(url):
  elif link[:2] == '//':
  if link.split('/')[2].startswith(host):
  verb('Internal page', link)
- internal.add(schema + link)
+ internal.add(schema + '://' + link)
  else:
  verb('External page', link)
  external.add(link)
  elif link[:1] == '/':
  verb('Internal page', link)
- internal.add(main_url + link)
+ internal.add(remove_file(url) + link)
  else:
  verb('Internal page', link)
- internal.add(main_url + '/' + link)
+ usable_url = remove_file(url)
+ if usable_url.endswith('/'):
+ internal.add(usable_url + link)
+ elif link.startswith('/'):
+ internal.add(usable_url + link)
+ else:
+ internal.add(usable_url + '/' + link)
 
  if not only_urls:
- intel_extractor(response)
+ intel_extractor(url, response)
  js_extractor(response)
  if args.regex and not supress_regex:
  regxy(args.regex, response, supress_regex, custom)
@@ -245,7 +250,7 @@ def extractor(url):
 
 def jscanner(url):
  """Extract endpoints from JavaScript code."""
- response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed)
+ response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed)
  # Extract URLs/endpoints
  matches = re.findall(r'[\'"](/.*?)[\'"]|[\'"](http.*?)[\'"]', response)
  # Iterate over the matches, match is a tuple
@@ -301,10 +306,8 @@ def jscanner(url):
  if '=' in url:
  fuzzable.add(url)
 
- for match in bad_intel:
- for x in match: # Because "match" is a tuple
- if x != '': # If the value isn't empty
- intel.add(x)
+ for match in intel:
+ intel.add(match)
  for url in external:
  try:
  if top_level(url, fix_protocol=True) in INTELS: