#!/usr/bin/env ruby
=begin

.$$$     $.                                   .$$$     $.         
$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$. 
$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'


WhatWeb - Next generation web scanner.
Author: Andrew Horton aka urbanadventurer
 
Homepage: http://www.morningstarsecurity.com/research/whatweb

Copyright 2009-2016 Andrew Horton <andrew at morningstarsecurity dot com>

This file is part of WhatWeb.

WhatWeb is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.

WhatWeb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with WhatWeb.  If not, see <http://www.gnu.org/licenses/>.
=end


#require 'profile' 
require 'getoptlong'
require 'pp'
require 'net/http'
require 'open-uri'
require 'cgi'
require 'thread'
require 'tempfile'
require 'rbconfig'  # detect environment, e.g. windows or linux
require 'resolv'
require 'resolv-replace' # asynchronous DNS
require 'open-uri'



## set up load paths - must be before loading lib/ files
# add the directory of the file currently being executed to the load path
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless
    $:.include?(File.dirname(__FILE__)) || $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
$LOAD_PATH << "/usr/share/whatweb/"

# if __FILE__ is a symlink then follow *every* symlink
if File.symlink?(__FILE__)
  require 'pathname'
  $LOAD_PATH << File.dirname( Pathname.new(__FILE__).realpath )
end


# Ruby Version Compatability
if RUBY_VERSION =~ /^1\.9/
	require 'digest/md5'
    require 'lib/extend-http_ruby1.9.rb'   

elsif RUBY_VERSION =~ /^2\./
	require 'digest/md5'
	require 'lib/extend-http_ruby2.rb'

elsif RUBY_VERSION =~ /^1\.8/
	puts "Sorry but Ruby 1.8 is not supported. WhatWeb requires Ruby 1.9 or later."
	exit 1

else
	puts "Unsupported version of Ruby"
	exit 1

end


### gem detection & loading
def gem_available?(gemname)
	#  gem_available_new_rubygems?(gemname) or gem_available_old_rubygems?(gemname)
	if defined?(Gem::Specification) and defined?(Gem::Specification.find_by_name)
		gem_available_new_rubygems?(gemname)
	else
		gem_available_old_rubygems?(gemname)
	end
end

# Needed by Ruby 1.8.7 (2010-08-16 patchlevel 302) used as stable in Debian in Aug2012.
def gem_available_old_rubygems?(gemname)
	Gem.available?(gemname)
end

def gem_available_new_rubygems?(gemname)
	begin
		true if Gem::Specification.find_by_name(gemname)
	rescue LoadError
		false 
	end
end

gems = %w|json mongo rchardet |

gems.each do |thisgem|
	begin
		require 'rubygems' # rubygems is optional
		if gem_available?(thisgem) #
			require thisgem			
		else
end
	rescue LoadError
		# that failed.. no big deal
		raise if $WWDEBUG==true
	end
end

require 'lib/target.rb'
require 'lib/plugins.rb'
require 'lib/output.rb'
require 'lib/colour.rb'
require 'lib/tld.rb'
require 'lib/version_class.rb'
require 'lib/http-status.rb'
HTTP_Status.initialize

# look through LOAD_PATH for the following plugin directories. Could be in same dir as whatweb or /usr/share/whatweb, etc
PLUGIN_DIRS=[ "plugins", "my-plugins"].map {|x| $LOAD_PATH.map {|y| y+"/"+x if File.exists?(y+"/"+x) } }.flatten.compact

# nothing says pro-developer like using global variables
$VERSION = "0.4.8-dev"
$WWDEBUG = false # raise exceptions in plugins, etc
$verbose = 0 # $VERBOSE is reserved in ruby
$use_colour = "auto"
$USER_AGENT = "WhatWeb/#{$VERSION}"
$MAX_THREADS = 25
$AGGRESSION = 1
$FOLLOW_REDIRECT = "always"
$MAX_REDIRECTS = 10
$USE_PROXY = false
$PROXY_HOST = nil
$PROXY_PORT = 8080
$PROXY_USER = nil
$PROXY_PASS = nil
$URL_PREFIX = ""
$URL_SUFFIX = ""
$URL_PATTERN = nil
$NO_THREADS = false
$HTTP_OPEN_TIMEOUT = 15
$HTTP_READ_TIMEOUT = 30
$WAIT = nil
$OUTPUT_ERRORS = nil
$QUIET = false
$CUSTOM_HEADERS = {}
$BASIC_AUTH_USER = nil
$BASIC_AUTH_PASS = nil
$PLUGIN_TIMES = Hash.new(0)
$NO_ERRORS = false


### matching

# fuzzy matching ftw
def make_tag_pattern(b)
	# remove stuff between script and /script
	# don't bother with  !--, --> or noscript and /noscript
	inscript = false;

	b.scan(/<([^\s>]*)/).flatten.map {|x| 
			x.downcase!
			r = nil
			r = x if inscript == false
			inscript = true if x == "script"
			(inscript = false; r = x) if x == "/script"
			r
		}.compact.join(",")
end

def decode_html_entities(s)
	t = s.dup
	html_entities = { "&quot;"=>'"', "&apos;"=>"'", "&amp;"=>"&", "&lt;"=>"<", "&gt;"=>">" }
	html_entities.each_pair { |from,to| t.gsub!(from, to) }
	t
end

def certainty_to_words(p)
	case p
		when 0..49
			"maybe"
		when 50..99
			"probably"
		when 100
			"certain"
	end
end

# some plugins want a random string in URLs
def randstr
	rand(36 ** 8).to_s(36)
end 


def match_ghdb(ghdb, body, meta, status, base_uri)
	# this could be made faster by creating code to eval once for each plugin

	pp "match_ghdb", ghdb if $verbose > 2
	
	# take a GHDB string and turn it into code to be evaluated
	matches = [] # fill with true or false. succeeds if all true
	s = ghdb

	# does it contain intitle?
	if s =~ /intitle:/i
		# extract either the next word or the following words enclosed in "s, it can't possibly be both
		intitle = (s.scan( /intitle:"([^"]*)"/i) + s.scan(/intitle:([^"]\w+)/i )).to_s
		matches << (( body =~ /<title>[^<]*#{Regexp.escape(intitle)}[^<]*<\/title>/i ).nil? ? false : true)
		# strip out the intitle: part
		s=s.gsub( /intitle:"([^"]*)"/i ,'').gsub( /intitle:([^"]\w+)/i ,'')
	end

	if s =~ /filetype:/i
		filetype = ( s.scan(/filetype:"([^"]*)"/i ) + s.scan( /filetype:([^"]\w+)/i )).to_s
		# lame method: check if the URL ends in the filetype
		unless base_uri.nil?
			unless base_uri.path.split("?")[0].nil?
				matches << (( base_uri.path.split("?")[0] =~ /#{Regexp.escape(filetype)}$/i ).nil? ? false : true)
			end
		end
		s=s.gsub( /filetype:"([^"]*)"/i,'').gsub( /filetype:([^"]\w+)/i,'')
	end

	if s =~ /inurl:/i
		inurl = ( s.scan(/inurl:"([^"]*)"/i ) + s.scan( /inurl:([^"]\w+)/i )).flatten
		# can occur multiple times.
		inurl.each { |x| matches << (( base_uri.to_s =~ /#{Regexp.escape(x)}/i ).nil? ? false : true)  }
		# strip out the inurl: part
		s = s.gsub( /inurl:"([^"]*)"/i,'').gsub( /inurl:([^"]\w+)/i,'' )
	end

	# split the remaining words except those enclosed in quotes, remove the quotes and sort them

	remaining_words = s.scan( /([^ "]+)|("[^"]+")/i ).flatten.compact.each { |w| w.delete!('"') }.sort.uniq
	
	pp "Remaining GHDB words", remaining_words if $verbose > 2
	
	remaining_words.each do |w| 	
		# does it start with a - ?
		if w[0..0] == '-'
			# reverse true/false if it begins with a -
			matches << (( body =~ /#{Regexp.escape(w[1..-1])}/i ).nil? ? true : false) 
		else
			w = w[1..-1] if w[0..0] == '+' # if it starts with +, ignore the 1st char
			matches << (( body =~ /#{Regexp.escape(w)}/i ).nil? ? false : true)
		end	
	end

	pp matches if $verbose > 2

	# if all matcbhes are true, then true	
	if matches.uniq == [true]
		true
	else
		false
	end
end


### targets

def make_target_list( cmdline_args, inputfile=nil, pluginlist = nil )
	url_list = cmdline_args

	# read each line as a url, skipping lines that begin with a #
	if !inputfile.nil? and File.exists?(inputfile)
		pp "loading input file: #{inputfile}" if $verbose > 2
		url_list += File.open(inputfile).readlines.each { |line| line.strip! }.delete_if { |line| line =~ /^#.*/ }.each { |line| line.delete!("\n") }
	end

	genrange=url_list.map do |x|
		range = nil
		if x =~ /^[0-9\.\-*\/]+$/ and not x =~ /^[\d\.]+$/
			# check for nmap
			error "Target ranges require nmap to be in the path" if `which nmap` == ""
			range = `nmap -n -sL #{x} 2>&1 | egrep -o "([0-9]{1,3}\\.){3}[0-9]{1,3}"`
			range = range.split("\n")
		end
		range
	end.compact.flatten
	
	url_list = url_list.select {|x| not x =~ /^[0-9\.\-*\/]+$/ or x =~ /^[\d\.]+$/ }
	url_list += genrange unless genrange.empty?
     

	#make urls friendlier, test if it's a file, if test for not assume it's http://
	# http, https, ftp, etc
	url_list = url_list.map do |x|   
		if File.exists?(x)
			x
		else
			# use url pattern
			if $URL_PATTERN
				x = $URL_PATTERN.gsub('%insert%',x)
			end
			# add prefix & suffix
			x=$URL_PREFIX + x + $URL_SUFFIX

			if x =~ (/^[a-z]+:\/\//)
				x
			else
				x.sub(/^/,"http://")
			end
		end	
	end

	url_list = url_list.flatten #.sort.uniq
end

def next_target
	t = nil

	puts "Target List:" + $targets.inspect if $verbose > 2

	while $recent_targets.include?(t) or t.nil?
		t = $targets.shift
		puts "# t at the end of the $targets list" if $verbose > 2
		# t at the end of the $targets list
		if t.nil?
			puts "t is nil" if $verbose > 2
			if $targets.empty?
				if Thread.list.size > 1
					if $verbose > 2
						puts "Thread list size: #{Thread.list.size}"
						Thread.list.each do |thread|
							puts "Thread: #{thread.inspect} is #{thread.status}"
						end
					end
					#sleep 1
					Thread.pass
				else
					puts "breaking now" if $verbose > 2
					break
				end
			end
		end
	end

	puts "Recent Targets:" + $recent_targets.join(",") if $verbose > 2

	$recent_targets.push t
	$recent_targets.pop if $recent_targets.size > 100 # we dont need to care about mroe than 100

	t
end

# backwards compatible convenience method for plugins to use
def open_target( url )
	newt = Target.new(url)
	newt.open
	[ newt.status, newt.uri, newt.ip, newt.body, newt.headers, newt.raw_headers ]
end

### output

def error( s )
	return if $NO_ERRORS
	if defined?( $semaphore )
		# We want the output mutex locked.
		# Has our current thread already locked the Mutex?
		begin
			$semaphore.lock
		rescue ThreadError
			# we're already locked. This was expected.
		end
	end
	if ( $use_colour=="auto" ) or ( $use_colour=="always" )
		STDERR.puts red( s )
	else
		STDERR.puts s
	end
	STDERR.flush
	unless $OUTPUT_ERRORS.nil?
		$OUTPUT_ERRORS.out( s )
	end
	$semaphore.unlock if defined?( $semaphore )
end

# takes a string and returns an array of lines. used by plugin_info
def word_wrap( s,width=10 )
	ret=[]
	line=""
	s.split.map { |x|
		word=x
		if line.size + x.size + 1 <= width
			line += x + " "
		else
			if word.size > width
				ret << line
				line = ""
				w = word.clone
				while w.size > width
					ret << w[0..(width-1)]
					w = w[width.to_i..-1]
				end
				ret << w unless w.size == 0
			else
				ret << line
				line = x + " "
			end
		end		
 	}
	ret << line unless line.empty?

	ret
end

### core

def run_plugins(target)
		results=[]
		$plugins_to_use.each do |name,plugin|
			begin			
				while plugin.locked?
					#sleep 0.1
					puts "Waiting for plugin:#{name} to unlock" if $verbose > 2
					Thread.pass
				end
				plugin.lock

				plugin.init(target)
				
				# eXecute the plugin			
				#start_time = Time.now
				result=plugin.x
				#end_time = Time.now
				#$PLUGIN_TIMES[name] += end_time - start_time

				plugin.unlock

			rescue Exception => err
				error("ERROR: Plugin #{name} failed for #{target.to_s}. #{err}")
				plugin.unlock
				raise if $WWDEBUG == true
			end
			results << [name, result] unless result.nil? or result.empty?
		end
	results
end




def usage_full()
puts"
.$$$     $.                                   .$$$     $.         
$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$. 
$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'

"

puts "WhatWeb - Next generation web scanner version #{$VERSION}.\nDeveloped by Andrew Horton aka urbanadventurer and Brendan Coles."
puts "Homepage: http://www.morningstarsecurity.com/research/whatweb"
puts
puts "Usage: whatweb [options] <URLs>"
puts "
TARGET SELECTION:
  <TARGETs>\t\t\tEnter URLs, hostnames, IP adddresses, 
  \t\t\t\tfilenames, or nmap-format IP address ranges.
  --input-file=FILE, -i\t\tRead targets from a file. You can pipe
\t\t\t\thostnames or URLs directly with -i /dev/stdin.

TARGET MODIFICATION:
  --url-prefix\t\t\tAdd a prefix to target URLs.
  --url-suffix\t\t\tAdd a suffix to target URLs.
  --url-pattern\t\t\tInsert the targets into a URL.
\t\t\t\te.g. example.com/%insert%/robots.txt

AGGRESSION:
The aggression level controls the trade-off between speed/stealth and
reliability.
  --aggression, -a=LEVEL\tSet the aggression level. Default: 1.
  1. Stealthy\t\t\tMakes one HTTP request per target and also 
  \t\t\t\tfollows redirects.
  3. Aggressive\t\t\tIf a level 1 plugin is matched, additional
  \t\t\t\trequests will be made.
  4. Heavy\t\t\tMakes a lot of HTTP requests per target. URLs 
  \t\t\t\tfrom all plugins are attempted.

HTTP OPTIONS:
  --user-agent, -U=AGENT\tIdentify as AGENT instead of WhatWeb/#{$VERSION}.
  --header, -H\t\t\tAdd an HTTP header. eg \"Foo:Bar\". Specifying a 
\t\t\t\tdefault header will replace it. Specifying an 
\t\t\t\tempty value, e.g. \"User-Agent:\" will remove it.
  --follow-redirect=WHEN\tControl when to follow redirects. WHEN may be
\t\t\t\t`never', `http-only', `meta-only', `same-site',
\t\t\t\t`same-domain' or `always'. Default: #{$FOLLOW_REDIRECT}.
  --max-redirects=NUM\t\tMaximum number of redirects. Default: 10.

AUTHENTICATION:
  --user, -u=<user:password>\tHTTP basic authentication.
  --cookie, -c=COOKIES\t\tUse cookies, e.g. 'name=value; name2=value2'.

PROXY:
  --proxy\t\t\t<hostname[:port]> Set proxy hostname and port.
\t\t\t\tDefault: #{$PROXY_PORT}.
  --proxy-user\t\t\t<username:password> Set proxy user and password.

PLUGINS:
  --list-plugins, -l\t\tList all plugins.
  --info-plugins, -I=[SEARCH]\tList all plugins with detailed information.
\t\t\t\tOptionally search with keywords in a comma
\t\t\t\tdelimited list.
  --search-plugins=STRING\tSearch plugins for a keyword.
  --plugins, -p=LIST\t\tSelect plugins. LIST is a comma delimited set 
\t\t\t\tof selected plugins. Default is all.
\t\t\t\tEach element can be a directory, file or plugin 
\t\t\t\tname and can optionally have a modifier, +/-.
\t\t\t\tExamples: +/tmp/moo.rb,+/tmp/foo.rb
\t\t\t\ttitle,md5,+./plugins-disabled/
\t\t\t\t./plugins-disabled,-md5
\t\t\t\t-p + is a shortcut for -p +plugins-disabled.

  --grep, -g=STRING\t\tSearch for STRING in HTTP responses. Reports 
\t\t\t\twith a plugin named Grep.
  --custom-plugin=DEFINITION\tDefine a custom plugin named Custom-Plugin,
\t\t\t\tExamples: \":text=>'powered by abc'\"
\t\t\t\t\":version=>/powered[ ]?by ab[0-9]/\"
\t\t\t\t\":ghdb=>'intitle:abc \\\"powered by abc\\\"'\"
\t\t\t\t\":md5=>'8666257030b94d3bdb46e05945f60b42'\"
\t\t\t\t\"{:text=>'powered by abc'}\"
  --dorks=PLUGIN\t\tList Google dorks for the selected plugin.

OUTPUT:
  --verbose, -v\t\t\tVerbose output includes plugin descriptions. 
\t\t\t\tUse twice for debugging.
  --colour,--color=WHEN\t\tcontrol whether colour is used. WHEN may be 
\t\t\t\t`never', `always', or `auto'.
  --quiet, -q\t\t\tDo not display brief logging to STDOUT.
  --no-errors\t\t\tSuppress error messages.

LOGGING:
  --log-brief=FILE\t\tLog brief, one-line output.
  --log-verbose=FILE\t\tLog verbose output.
  --log-errors=FILE\t\tLog errors.
  --log-xml=FILE\t\tLog XML format.
  --log-json=FILE\t\tLog JSON format.
  --log-sql=FILE\t\tLog SQL INSERT statements.
  --log-sql-create=FILE\t\tCreate SQL database tables.
  --log-json-verbose=FILE\tLog JSON Verbose format.
  --log-magictree=FILE\t\tLog MagicTree XML format.
  --log-object=FILE\t\tLog Ruby object inspection format.
  --log-mongo-database\t\tName of the MongoDB database.
  --log-mongo-collection\tName of the MongoDB collection.
\t\t\t\tDefault: whatweb.
  --log-mongo-host\t\tMongoDB hostname or IP address.
\t\t\t\tDefault: 0.0.0.0.
  --log-mongo-username\t\tMongoDB username. Default: nil.
  --log-mongo-password\t\tMongoDB password. Default: nil.
  
PERFORMANCE & STABILITY:
  --max-threads, -t\t\tNumber of simultaneous threads. Default: #{$MAX_THREADS}.
  --open-timeout\t\tTime in seconds. Default: #{$HTTP_OPEN_TIMEOUT}.
  --read-timeout\t\tTime in seconds. Default: #{$HTTP_READ_TIMEOUT}.
  --wait=SECONDS\t\tWait SECONDS between connections.
\t\t\t\tThis is useful when using a single thread.

HELP & MISCELLANEOUS:
  --short-help\t\t\tShort usage help.
  --help, -h\t\t\tComplete usage help.
  --debug\t\t\tRaise errors in plugins.
  --version\t\t\tDisplay version information.

EXAMPLE USAGE:
* Scan example.com.
  ./whatweb example.com
* Scan reddit.com slashdot.org with verbose plugin descriptions.
  ./whatweb -v reddit.com slashdot.org
* An aggressive scan of wired.com detects the exact version of WordPress.
  ./whatweb -a 3 www.wired.com
* Scan the local network quickly and suppress errors.
  whatweb --no-errors 192.168.0.0/24
* Scan the local network for https websites.
  whatweb --no-errors --url-prefix https:// 192.168.0.0/24
* Scan for crossdomain policies in the Alexa Top 1000.
  ./whatweb -i plugin-development/alexa-top-100.txt \\
  --url-suffix /crossdomain.xml -p crossdomain_xml\n"

	suggestions=""
	suggestions << "To enable JSON logging install the json gem.\n" unless gem_available?('json')
	suggestions << "To enable MongoDB logging install the mongo gem.\n" unless gem_available?('mongo')
	suggestions << "To enable character set detection and MongoDB logging install the rchardet gem.\n" unless gem_available?('rchardet')

	unless suggestions.empty?
		print "\nOPTIONAL DEPENDENCIES\n--------------------------------------------------------------------------------\n" + suggestions + "\n"
	end

	puts
end

def usage_short()
puts"
.$$$     $.                                   .$$$     $.         
$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$. 
$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'

"

puts "WhatWeb - Next generation web scanner version #{$VERSION}.\nDeveloped by Andrew Horton aka urbanadventurer and Brendan Coles."
puts "Homepage: http://www.morningstarsecurity.com/research/whatweb"
puts
puts "Usage: whatweb [options] <URLs>"
puts "
TARGET SELECTION:
  <TARGETs>\t\t\tEnter URLs, hostnames, IP adddresses, or 
  \t\t\t\tnmap-format IP ranges.
  --input-file=FILE, -i\t\tRead targets from a file.

AGGRESSION:
  --aggression, -a=LEVEL\tSet the aggression level. Default: 1.
  1. Stealthy\t\t\tMakes one HTTP request per target and also 
  \t\t\t\tfollows redirects.
  3. Aggressive\t\t\tIf a level 1 plugin is matched, additional
  \t\t\t\trequests will be made.

PLUGINS:
  --list-plugins, -l\t\tList all plugins.
  --info-plugins, -I=[SEARCH]\tList all plugins with detailed information.
\t\t\t\tOptionally search with a keyword.
  --search-plugins=STRING\tSearch plugins for a keyword.
  --grep, -g=STRING\t\tSearch for STRING in HTTP responses. Reports
\t\t\t\twith a plugin named Grep.
OUTPUT:
  --verbose, -v\t\t\tVerbose output includes plugin descriptions. 
\t\t\t\tUse twice for debugging.
  --colour,--color=WHEN\t\tcontrol whether colour is used. WHEN may be 
\t\t\t\t`never', `always', or `auto'.
 
HELP & MISCELLANEOUS:
  --short-help\t\t\tThis short usage help.
  --help, -h\t\t\tComplete usage help.

EXAMPLE USAGE:
* Scan example.com.
  ./whatweb example.com
* Scan reddit.com slashdot.org with verbose plugin descriptions.
  ./whatweb -v reddit.com slashdot.org
* An aggressive scan of wired.com detects the exact version of WordPress.
  ./whatweb -a 3 www.wired.com
* Scan the local network quickly and suppress errors.
  whatweb --no-errors 192.168.0.0/24
* Scan the local network for HTTPS websites.
  whatweb --no-errors --url-prefix https:// 192.168.0.0/24
* Scan for crossdomain policies in the Alexa Top 1000.
  ./whatweb -i plugin-development/alexa-top-100.txt \\
  --url-suffix /crossdomain.xml -p crossdomain_xml

  Note: This is the short usage help. 
  For the complete usage help use -h or --help.\n"

puts

end


if ARGV.size==0 # faster usage info
	usage_short
	exit 
end

plugin_selection=nil
use_custom_plugin=false
use_custom_grep_plugin=false
input_file=nil
output_list = []
mongo={}
mongo[:use_mongo_log]=false

# optional arguments work badly with URLs
opts = GetoptLong.new(
      [ '-h', '--help', GetoptLong::NO_ARGUMENT ],
      [ '--short-help', GetoptLong::NO_ARGUMENT ],      
      [ '-v','--verbose', GetoptLong::NO_ARGUMENT ],
      [ '-l','--list-plugins', GetoptLong::NO_ARGUMENT ],
      [ '-p','--plugins', GetoptLong::REQUIRED_ARGUMENT ],
      [ '-I','--info-plugins','--search-plugins', GetoptLong::OPTIONAL_ARGUMENT ],            
      [ '--dorks', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--colour','--color', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-object', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-brief', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-xml', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-json', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-json-verbose', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-magictree', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-verbose', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-mongo-collection', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-mongo-host', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-mongo-database', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-mongo-username', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-mongo-password', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-sql', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-sql-create', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--log-errors', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--no-errors', GetoptLong::NO_ARGUMENT ],
      [ '-i','--input-file', GetoptLong::REQUIRED_ARGUMENT ],
      [ '-U','--user-agent', GetoptLong::REQUIRED_ARGUMENT ],
      [ '-a','--aggression', GetoptLong::REQUIRED_ARGUMENT ],
      [ '-t','--max-threads', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--follow-redirect', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--max-redirects', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--proxy', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--proxy-user', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--url-prefix', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--url-suffix', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--url-pattern', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--custom-plugin', GetoptLong::REQUIRED_ARGUMENT ],
      [ '-g','--grep', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--open-timeout', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--read-timeout', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--header','-H', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--cookie','-c', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--user','-u', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--wait', GetoptLong::REQUIRED_ARGUMENT ],
      [ '--debug', GetoptLong::NO_ARGUMENT ],
      [ '--version', GetoptLong::NO_ARGUMENT ],
      [ '-q','--quiet', GetoptLong::NO_ARGUMENT]
    )

begin    
	opts.each do |opt, arg|
		case opt
			when '-i','--input-file'
				input_file=arg
			when '-l','--list-plugins'
				PluginSupport.load_plugins
				PluginSupport.plugin_list
				exit
			when '-p','--plugins'
				plugin_selection=arg
			when '-I','--info-plugins'                
				PluginSupport.load_plugins
				PluginSupport.plugin_info(arg.split(","))
				exit
			when '--dorks'
				PluginSupport.load_plugins
				PluginSupport.plugin_dorks(arg)
				exit

			when '--color','--colour'
				$use_colour="always" unless arg # no argument
				case arg.downcase
					when 'auto'
						$use_colour="auto"
					when 'always'
						$use_colour="always"
					when 'never'
						$use_colour=false
					else
						raise("--colour argument not recognized")
					end
			when '--log-object'
				output_list << OutputObject.new(arg)
			when '--log-brief'
			 	output_list << OutputBrief.new(arg)
			when '--log-xml'
			 	output_list << OutputXML.new(arg)
			when '--log-magictree'
				output_list << OutputMagicTreeXML.new(arg)
			when '--log-verbose'
				output_list << OutputVerbose.new(arg)
			when '--log-sql'
			 	output_list << OutputSQL.new(arg)
			when '--log-sql-create'
				PluginSupport.load_plugins("+")
				# delete the file if it already exists
				begin
					File.delete(arg)
				rescue
				end
			 	OutputSQL.new(arg).create_tables
				puts "SQL CREATE statements written to #{arg}"
				exit
			when '--log-json'
				if defined?(JSON)
			 		output_list << OutputJSON.new(arg)
				else
					raise("Sorry. The JSON gem is required for JSON output")
				end
			when '--log-json-verbose'
				if defined?(JSON)
			 		output_list << OutputJSONVerbose.new(arg)
				else
					raise("Sorry. The JSON gem is required for JSONVerbose output")
				end
			when '--log-mongo-collection'
				if defined?(Mongo) and defined?(CharDet)
					mongo[:collection]=arg
					mongo[:use_mongo_log]=true
				else
					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
				end

			when '--log-mongo-host'
				if defined?(Mongo) and defined?(CharDet)
			 		mongo[:host]=arg
					mongo[:use_mongo_log]=true
				else
					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
				end

			when '--log-mongo-database'
				if defined?(Mongo) and defined?(CharDet)
			 		mongo[:database]=arg
					mongo[:use_mongo_log]=true
				else
					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
				end
			when '--log-mongo-username'
				if defined?(Mongo) and defined?(CharDet)
			 		mongo[:username]=arg
					mongo[:use_mongo_log]=true
				else
					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
				end
			when '--log-mongo-password'
				if defined?(Mongo) and defined?(CharDet)
			 		mongo[:password]=arg
					mongo[:use_mongo_log]=true
				else
					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
				end
			when '--log-errors'
			 	$OUTPUT_ERRORS = OutputErrors.new(arg)
			when '--no-errors'
			 	$NO_ERRORS = true
			when '-U','--user-agent'
				$USER_AGENT=arg
			when '-t','--max-threads'
				$MAX_THREADS=arg.to_i
			when '-a','--aggression'
				raise "Agression level must be 1,3, or 4. #{arg} is invalid." unless [1,3,4].include? arg.to_i
				$AGGRESSION=arg.to_i
			when '--proxy'
				$USE_PROXY=true
				$PROXY_HOST = arg.to_s.split(":")[0]
				$PROXY_PORT = arg.to_s.split(":")[1].to_i if arg.to_s.include?(":")		
			when '--proxy-user'
				$PROXY_USER=arg.to_s.split(":")[0]
				$PROXY_PASS=arg.to_s.scan(/^[^:]*:(.+)/).to_s if arg =~ /^[^:]*:(.+)/
			when '-q','--quiet'
				$QUIET=true
			when '--url-prefix'
				$URL_PREFIX=arg
			when '--url-suffix'
				$URL_SUFFIX=arg
			when '--url-pattern'
				$URL_PATTERN=arg
			when '--custom-plugin'
				use_custom_plugin=true if PluginSupport.custom_plugin(arg)
			when '--grep','-g'
				use_custom_grep_plugin=true if PluginSupport.custom_plugin(arg,"grep")
			when '--follow-redirect'	
				if ["never","http-only","meta-only","same-site","same-domain","always"].include?(arg.downcase)
					$FOLLOW_REDIRECT=arg.downcase
				else
					raise("Invalid --follow-redirect parameter.")
				end
			when '--max-redirects'
				$MAX_REDIRECTS=arg.to_i
			when '--open-timeout'					
				$HTTP_OPEN_TIMEOUT=arg.to_i
			when '--read-timeout'					
				$HTTP_READ_TIMEOUT=arg.to_i
			when '--wait'
				$WAIT = arg.to_i
			when '-H','--header'
				begin
					x=arg.scan(/([^:]+):(.*)/).flatten
					raise if x.empty?
					$CUSTOM_HEADERS[x.first]=x.last
				rescue
					raise("Invalid --header parameter.")
				end
			when '-c','--cookie'
				begin
					raise if arg.empty?
					$CUSTOM_HEADERS["Cookie"]=arg
				rescue
					raise("Cookie require a parameter, e.g. name=value; name2=value2")
				end
			when '-u','--user'
				$BASIC_AUTH_USER=arg.split(":").first
				$BASIC_AUTH_PASS=arg.to_s.scan(/^[^:]*:(.+)/).to_s if arg =~ /^[^:]*:(.+)/
			when '--debug'
				$WWDEBUG = true
			when '--short-help'
				usage_short
				exit
			when '-h','--help'
				usage_full
				exit
			when '-v','--verbose'
				$verbose=$verbose+1
			when '--version'
				puts "WhatWeb version #{$VERSION} ( http://www.morningstarsecurity.com/research/whatweb/ )"
				exit
		end
	end		
rescue Errno::EPIPE
	exit
rescue StandardError, GetoptLong::Error => err
	# Disable colours in Windows environments for errors in usage
	if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
		$use_colour = false
	end
	puts
	error err
	exit
end

# sanity check # Disable colours in Windows environments when set to auto
if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
	$use_colour = false unless $use_colour == "always"
end

### PLUGINS
plugin_selection += ",+Custom-Plugin" 	if use_custom_plugin and plugin_selection
plugin_selection += ",+Grep" 		if use_custom_grep_plugin and plugin_selection
$plugins_to_use = PluginSupport.load_plugins(plugin_selection)
# load all the plugins

# sanity check # no plugins?
if $plugins_to_use.size == 0
	error "No plugins selected, exiting."
	exit 1
end

# optimise plugins
PluginSupport.precompile_regular_expressions 

### OUTPUT
output_list << OutputBrief.new unless $QUIET or $verbose > 0 # by default output brief
output_list << OutputObject.new() if $verbose > 1 # full output if -vv
output_list << OutputVerbose.new() if $verbose > 0 # full output if -v

## output dependencies
if mongo[:use_mongo_log]
	if $plugins_to_use.map { |a,b| a }.include?("Charset")
		output_list << OutputMongo.new(mongo) 
	else
		error("MongoDB logging requires the Charset plugin to be activated. The Charset plugin is the slowest whatweb plugin, it not included by default, and resides in the plugins-disabled folder. Use ./whatweb -p +./plugins-disabled/Charset.rb to enable it.")
		exit
	end
end

## Headers
$CUSTOM_HEADERS["User-Agent"]=$USER_AGENT unless $CUSTOM_HEADERS["User-Agent"]
$CUSTOM_HEADERS.delete_if {|k,v| v=="" }

### TARGETS
# clean up urls, add example urls if needed
$targets=make_target_list(ARGV, input_file, $plugins_to_use)
$recent_targets=[]

# fail & show usage if no targets.
if $targets.size <1
	error "No targets selected"		
	exit 1
end

$semaphore=Mutex.new
Thread.abort_on_exception = true if $WWDEBUG

while t = next_target
		Thread.new(t) do |thistarget|
			begin
				target = Target.new(thistarget) # we set the target within the thread
			rescue => err
				error(err)
				next
			end

			puts Thread.current.to_s + " started for " + target.to_s if $verbose>1
			sleep $WAIT unless $WAIT.nil? # wait

			# follow redirects
			no_redirects =false
			num_redirects = 0
			while no_redirects == false do
				no_redirects=true if target.is_file?
				# if we redirect 10 times we give up
				if num_redirects == $MAX_REDIRECTS
					error("ERROR Too many redirects: #{target.to_s}")
					no_redirects=true
					next
				end

				begin
					target.open
				rescue => err
					error("ERROR Opening target: #{target.to_s} - #{err}")
					no_redirects = true # without this we can get stuck in a loop
					raise if $WWDEBUG
					next
				end

				if target.is_url? and target.status.nil?
					# assume all HTTP sites return a status
					no_redirects=true
					next
				end

				results = run_plugins(target)

				# reporting
				# multiple output plugins simultaneously, some stdout, some files
				output_list.each do |o|
					begin
						o.out(target, target.status, results)
					rescue => err
						#srsly, logging failed
						error("ERROR Logging failed: #{target.to_s} - #{err}")
						raise if $WWDEBUG==true
					end
				end

				# REDIRECTION
				unless no_redirects
					begin
						if newtarget = target.get_redirection_target		
							num_redirects+=1 
							target=Target.new(newtarget)
						else
							no_redirects=true 
						end
					rescue => err
						error("ERROR Redirection broken: #{target.to_s} - #{err}")
						no_redirects=true
						raise if $WWDEBUG==true
					end
				end
			end # while no_redirects
		end # Thread.new

	while Thread.list.size>($MAX_THREADS+1)
		puts "Thread list full, passing control" if $verbose>1
		#sleep 0.5
		Thread.pass
	end
end # targets.each

# close output logs
output_list.each {|o| 
	o.close
}

# shutdown plugins
Plugin.registered_plugins.map {|name,plugin| plugin.shutdown }

#pp $PLUGIN_TIMES.sort_by {|x,y|y }
