Script para analise de superficie de ataque
Publicado por Dionata Suzin (última atualização em 11/10/2024)
[ Hits: 1.814 ]
Script para analise de superficie de ataque
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
import json
# Definir o domínio diretamente no script
domain = "exemplo.com.br"
# Função para buscar todas as URLs expostas via crawling
def full_web_crawl(base_url):
visited_urls = set()
pending_urls = set([base_url])
while pending_urls:
url = pending_urls.pop()
if url not in visited_urls:
visited_urls.add(url)
try:
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# Buscar todas as URLs na página atual
for link in soup.find_all('a', href=True):
absolute_url = urljoin(base_url, link['href'])
parsed_url = urlparse(absolute_url)
# Filtrar para o domínio base e ignorar âncoras
if parsed_url.netloc == urlparse(base_url).netloc:
clean_url = absolute_url.split('#')[0]
if clean_url not in visited_urls:
pending_urls.add(clean_url)
except requests.RequestException as e:
print(f"Erro ao acessar {url}: {e}")
return visited_urls
# Função para buscar subdomínios expostos via crt.sh
def find_subdomains(domain):
subdomains = set()
try:
crtsh_url = f"https://crt.sh/?q=%25.{domain}&output=json"
response = requests.get(crtsh_url)
if response.status_code == 200:
data = response.json()
for entry in data:
name_value = entry.get("name_value")
if name_value:
subdomains.update(name_value.split("\n"))
except requests.RequestException as e:
print(f"Erro ao buscar subdomínios para {domain}: {e}")
return subdomains
# Função para buscar URLs expostas via Google Dorks
def google_dork_search(domain):
dorks = [
f"site:{domain} intitle:index of",
f"site:{domain} ext:php",
f"site:{domain} ext:sql",
f"site:{domain} ext:log",
f"site:{domain} inurl:admin",
f"site:{domain} inurl:login",
f"site:{domain} inurl:backup",
f"site:{domain} ext:bak",
f"site:{domain} ext:old",
f"site:{domain} ext:swp",
f"site:{domain} inurl:config",
f"site:{domain} ext:xml",
f"site:{domain} ext:json",
f"site:{domain} inurl:dbadmin",
f"site:{domain} inurl:dev",
f"site:{domain} inurl:test",
f"site:{domain} inurl:staging"
]
found_urls = []
for dork in dorks:
try:
print(f"Pesquisando por: {dork}")
url = f"https://www.google.com/search?q={dork}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
for link in soup.find_all('a'):
href = link.get('href')
if href and "url?q=" in href:
url_found = href.split("url?q=")[1].split("&")[0]
found_urls.append(url_found)
except requests.exceptions.RequestException as e:
print(f"Erro ao acessar {dork}: {e}")
return found_urls
# Função para buscar URLs arquivadas usando a API do Wayback Machine
def wayback_machine_search(domain):
url = f"https://web.archive.org/cdx/search/cdx?url=*.{domain}/*&output=json&fl=original&collapse=urlkey"
try:
print(f"Buscando URLs arquivadas via Wayback Machine para {domain}")
response = requests.get(url)
response.raise_for_status()
urls = [entry[0] for entry in response.json()]
return urls
except requests.exceptions.RequestException as e:
print(f"Erro ao acessar Wayback Machine: {e}")
return []
# Função para realizar web crawling básico para URLs sensíveis, inclusive as não indexadas
def web_crawl(domain):
sensitive_paths = [
'admin', 'login', 'backup', 'config', 'private', 'phpmyadmin',
'webmail', 'wp-admin', 'dashboard', 'dbadmin', 'test', 'staging',
'dev', 'sql', 'server-status', 'cgi-bin', 'shell', 'cmd',
'config.php', 'passwd', 'password', 'database', 'db', 'logs',
'temp', 'wp-login.php', 'wp-content', 'uploads', 'downloads',
'sensitive-data', 'customer-data', 'user-data', 'backup.zip',
'backup.tar', 'admin.php', 'access', 'htaccess', 'backup.sql',
'api', 'debug', 'staging', 'test', 'adminer', 'adminpanel', 'dev',
'v1', 'v2', 'old', 'outdated', 'backup_old', 'src', 'source',
'error', 'log', 'hidden', 'private', 'local', 'files', 'data'
]
found_paths = []
base_url = f"https://{domain}/"
for path in sensitive_paths:
url = f"https://{domain}/{path}"
try:
response = requests.get(url, allow_redirects=True)
if response.status_code == 200:
# Verifica se a URL não é redirecionada para a página principal
if url != response.url and domain in response.url:
found_paths.append(url)
else:
print(f"URL {url} retornou status {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Erro ao acessar {url}: {e}")
return found_paths
# Função para gerar um relatório HTML com as URLs encontradas
def generate_html_report(domain, crawled_urls, subdomains, google_urls, wayback_urls, sensitive_urls):
html_content = f"""
<html>
<head>
<title>Relatório de URLs Expostas - {domain}</title>
<style>
body {{ font-family: Arial, sans-serif; background-color: #f4f4f4; padding: 20px; }}
table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
th, td {{ border: 1px solid #dddddd; padding: 8px; text-align: left; }}
th {{ background-color: #1E90FF; color: white; }}
td {{ background-color: #f2f2f2; }}
h1 {{ color: #1E90FF; }}
h2 {{ color: #4682B4; }}
tr:nth-child(even) {{ background-color: #E0FFFF; }}
</style>
</head>
<body>
<h1>Relatório de URLs Expostas para {domain}</h1>
<h2>URLs Encontradas via Web Crawling:</h2>
<table>
<tr><th>URL</th></tr>
"""
if crawled_urls:
for url in crawled_urls:
html_content += f"<tr><td>{url}</td></tr>"
else:
html_content += "<tr><td>Nenhuma URL encontrada via crawling.</td></tr>"
html_content += """
</table>
<h2>Subdomínios Encontrados:</h2>
<table>
<tr><th>Subdomínio</th></tr>
"""
if subdomains:
for subdomain in subdomains:
html_content += f"<tr><td>{subdomain}</td></tr>"
else:
html_content += "<tr><td>Nenhum subdomínio encontrado.</td></tr>"
html_content += """
</table>
<h2>Google Dorks:</h2>
<table>
<tr><th>URL</th></tr>
"""
if google_urls:
for url in google_urls:
html_content += f"<tr><td>{url}</td></tr>"
else:
html_content += "<tr><td>Nenhuma URL encontrada via Google Dorks.</td></tr>"
html_content += """
</table>
<h2>Wayback Machine URLs:</h2>
<table>
<tr><th>URL</th></tr>
"""
if wayback_urls:
for url in wayback_urls:
html_content += f"<tr><td>{url}</td></tr>"
else:
html_content += "<tr><td>Nenhuma URL encontrada via Wayback Machine.</td></tr>"
html_content += """
</table>
<h2>URLs Sensíveis Encontradas:</h2>
<table>
<tr><th>URL</th></tr>
"""
if sensitive_urls:
for url in sensitive_urls:
html_content += f"<tr><td>{url}</td></tr>"
else:
html_content += "<tr><td>Nenhuma URL sensível encontrada.</td></tr>"
html_content += """
</table>
</body>
</html>
"""
with open(f"relatorio_{domain}.html", "w") as report_file:
report_file.write(html_content)
print(f"Relatório HTML gerado: relatorio_{domain}.html")
if __name__ == "__main__":
# Descobrir URLs e subdomínios expostos
crawled_urls = full_web_crawl(f"https://{domain}")
subdomains = find_subdomains(domain)
google_urls = google_dork_search(domain)
wayback_urls = wayback_machine_search(domain)
sensitive_urls = web_crawl(domain)
# Gerar relatório
generate_html_report(domain, crawled_urls, subdomains, google_urls, wayback_urls, sensitive_urls)
RT CRASH - "Quebrando" hash MD5, SHA1, SHA224, SHA256, SHA384 e SHA512
Automação de scan de vulnerabilidades
Krypt - Função de criptografia por chave de qualquer tamanho
Como extrair chaves TOTP 2FA a partir de QRCODE (Google Authenticator)
Linux em 2025: Segurança prática para o usuário
Desktop Linux em alta: novos apps, distros e privacidade marcam o sábado
IA chega ao desktop e impulsiona produtividade no mundo Linux
Novos apps de produtividade, avanços em IA e distros em ebulição agitam o universo Linux
Como instalar o repositório do DBeaver no Ubuntu
Como instalar o Plex Media Server no Ubuntu
Digitando underscore com "shift" + "barra de espaços"
Como ativar a lixeira e recuperar aquivos deletados em um servidor Linux
Como mudar o nome de dispositivos Bluetooth via linha de comando
O programa assinador digital (1)
PIP3 - erro ao instalar módulo do mariadb para o Python (9)
É normal não gostar de KDE? (8)
dpkg: erro: gatilho de arquivo duplicado chamado pelo arquivo de nome (6)









