<?php
class Normalizer {
function Normalizer($url){
$parts = parse_url($url);
$this->host = $this->clean_host($parts['host']);
if($this->is_ip($this->host)){
$this->host = normalize_ip($this->host);
}
if(isset($parts['path'])) {
$this->path = $this->clean_path($parts['path']);
} else {
$this->path = '/';
}
if(isset($parts['query'])) {
$this->query = $parts['query'];
} else {
$this->query = '';
}
}
function clean_host($host){
$host = preg_replace('`\.+`','.', $host);
$host = preg_replace('`^\.`', '', $host);
$host = preg_replace('`\.$`', '', $host);
$host = strtolower($host);
return urlencode($host);
}
function clean_path($path){
while(preg_match('`%[0-9a-fA-f]{2}`', $path)) {
$path = urldecode($path);
}
$old = '';
while($old != $path){
$old = $path;
$path = str_replace('/./','/',$path);
}
$path = preg_replace('`/+`','/',$path);
$parts = explode("/", $path);
$i = 0;
$j = 0;
$segs = array();
while($i < count($parts)) {
if($parts[$i] == '..'){
unset($parts[$i]);
unset($parts[$i-1]);
$i--; $j++;
continue;
}
$segs[$i] = urlencode($parts[$j]);
$i++;
$j++;
}
return implode("/", $segs);
}
function is_ip($host){
$parts = explode(".", $host);
// too many parts to the IP
if(count($parts) > 4) {
return false;
}
// all parts are valid numbers
foreach($parts as $segment) {
if(preg_match("`^\d+$`", $segment) == false &&
preg_match("`^0x(\d|[a-fA-F])+$`", $segment) == false){
return false;
}
}
return true;
}
function normalize_ip($host){
$parts = explode(".", $host);
$count = count($parts);
$int = 0;
$i = 0;
foreach($parts as $segment) {
$i++;
if(substr($segment, 0, 2) == "0x"){ // hex
$value = base_convert(substr($segment, 2),16,10);
} else if(substr($segment, 0, 1) == "0"){ // octal
$value = base_convert($segment,8,10);
} else { // decimal
$value = base_convert($segment,10,10);
}
if($count == 1) { // only one number
while($value > (pow(2,32)-1)) { // bring it down into the right bit range
$value = $value - pow(2,32);
}
$int |= $value;
} else if($i == $count) { // if it is the last bit
$int |= $value & (pow(2,8*($count-1))-1);
} else {
$int |= ($value & 255) << (4-$i)*8;
}
}
return (($int>>24)&255) . "." . (($int>>16)&255) . "." . (($int>>8)&255) . "." . ($int&255);
}
function path_possibilities() {
$pos = array();
$pos[] = $this->path;
if($this->query){
$pos[] = $this->path . "?" . $this->query;
}
$parts = explode("/", $this->path);
$i = 0;
while($i < 4 && $i < count($parts)-1) {
if($i == 0){
$pos[] = "/";
} else if($i < (count($parts)-1)){
$pos[] = $pos[count($pos)-1] . $parts[$i] . "/";
} else {
$pos[] = $pos[count($pos)-1] . $parts[$i];
}
$i++;
}
return $pos;
}
function host_possibilities() {
$pos = array();
$pos[] = $this->host;
$parts = explode('.',$this->host);
$host = array_pop($parts);
for($i = 2;$i < 6 && count($parts) > 1; $i++) {
$host = array_pop($parts) . "." . $host;
$pos[] = $host;
}
return $pos;
}
function link_possibilities() {
$hosts = $this->host_possibilities();
$paths = $this->path_possibilities();
$pos = array();
foreach($hosts as $host){
foreach($paths as $path){
$pos[] = $host . $path;
}
}
return $pos;
}
function hash_possibilities() {
$hosts = $this->host_possibilities();
$paths = $this->path_possibilities();
$pos = array();
foreach($hosts as $host){
foreach($paths as $path){
$pos[] = md5($host . $path, true);
}
}
return $pos;
}
}
class ScamTest {
function ScamTest($link,$database) {
$this->scam = false;
$this->malware = false;
$this->database = $database;
$url = new Normalizer($link);
$link_hashes = array();
foreach($url->hash_possibilities() as $hash) {
$link_hashes[] = $database->excape($hash);
}
$hashes = implode("','", $link_hashes);
$database->sql("SELECT `type` FROM `hashes` WHERE `hash` IN ('$hashes')");
while($row = $database->get_row()){
if($row['type'] == 'goog-malware-hash') {
$this->malware = true;
} else if($row['type'] == 'goog-black-hash') {
$this->scam = true;
}
}
}
function hex2bin($str) {
$bin = "";
$i = 0;
do {
$bin .= chr(hexdec($str{$i}.$str{($i + 1)}));
$i += 2;
} while ($i < strlen($str));
return $bin;
}
function is_scam() {
return $this->scam;
}
function is_malware() {
return $this->malware;
}
}
?>