codice:
// ************************************************************
// * function get_page($target_page) *
// ************************************************************
// * This function downloads the requested URL and returns it *
// ************************************************************
function get_page($target_page, $showheaders=true)
{
// $debug = TRUE;
$getting_headers = TRUE;
// First thing off we clean up a bit the page URL:
$target_page = str_replace("http://", "", $target_page);
$target_page = str_replace("HTTP://", "", $target_page);
// We look for the first slash:
$first_slash_pos = strpos($target_page, "/");
$last_slash_pos = strrpos($target_page, "/");
// We divide the URL in the useful parts:
$site = substr($target_page, 0, $first_slash_pos);
$page = substr($target_page, $first_slash_pos);
$directory = substr($target_page, 0, $last_slash_pos);
// Now we actually get the page: open a connection to the site
$remote = @fsockopen($site, 80, &$errno, &$errstr, 15);
if(!$remote)
{
print("[b]Error opening connection: $errstr($errno)[b]\n");
exit();
}
if ($debug)
{
print("Successfully opened connection to site $site
\n");
flush();
}
$pagerequest = "GET $page HTTP/1.0\r\n".
// "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, text/plain, text/html, */*\n".
// "Accept-Language: en-us\n".
// "Accept-Encoding: \n".
"User-Agent: Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.0)\n". // Mozilla/4.0 (compatible; MSIE 5.01; Windows NT)\n".
// "Referer: http://$site$page\n".
"Host: $site:80\r\n".
// "Connection: Keep-Alive\n".
"Accept: */*\r\n".
"\n\n";
if ($showheaders)
{
print("Page request:
\n");
print(nl2br($pagerequest));
print("
\n");
}
fputs ($remote, $pagerequest);
if ($debug)
{
print("Waiting for request data
\n\n");
flush();
}
$finished_receiving = FALSE;
$received_content_size = 0;
while (!$finished_receiving)
{
// First thing off we try to separate the headers
// from the rest and see if we got a 200 answer
$line = fgets ($remote, 2000);
$linecount ++;
$line_length = strlen($line);
if (strstr($line, "chunked"))
{
if ($debug) print("Found chunked encoding.
");
$chunked = true;
}
// Extra: headers are always displayed:
if (($getting_headers) && ($showheaders)) print("[H $linecount ] : $line
\n");
if ($debug)
{
if ($getting_headers)
{
// print("[$linecount]: Received header data line [length:$line_length].
\n\n");
// print("$line");
// print("[H $linecount ] : $line
\n");
}
else
{
// print("[$linecount]: Received page data line [length:$line_length].
\n\n");
$tmp_newline = str_replace("<", "<", $line);
$tmp_newline = str_replace(">", ">", $tmp_newline);
if (!$chunked)
print("[B $linecount ] : $tmp_newline
\n");
else
{
print("[B $linecount ] : $tmp_newline
\n");
//for ($k = 0; $k < strlen($line); $k ++)
// {
// print(ord($line[$k]) . " ");
// }
}
}
flush();
}
// Convertiamo tutto in lowercase
$str = $line;
$lowercase_line = strtolower($line);
$str = str_replace("\n", "", $str);
$str = str_replace("\r", "", $str);
$lowercase_line = str_replace("\n", "", $lowercase_line);
$lowercase_line = str_replace("\r", "", $lowercase_line);
// This is what we do while we get the headers
if ($getting_headers == TRUE)
{
// We look for the content length string:
$cl_len = strlen("Content-Length: ");
$beginning = substr($line, 0, $cl_len);
if ($beginning == "Content-Length: ")
{
$content_length = substr($line, $cl_len) / 1; // We divide by 1 to make sure it gets formatted like a number :)
if ($debug)
print("Found content length: $content_length
\n");
}
// Let's check if we get an OK response (first line must contain an http 200 code)
if (
($linecount == 1)
&&
(($lowercase_line == "http/1.0 200 ok") || ($lowercase_line == "http/1.1 200 ok"))
)
{ $ok_response = TRUE; if ($debug) print("OK response received
"); }
// If we find an empty line while reading the HTTP headers,
// this means that the headers are over and that the data
// is coming:
if (strlen($str) == 0)
{
$getting_headers = FALSE;
if ($debug) print("Finished with the headers
");
}
}
else
{
// If we are done with the headers, here's what we do
$received_content_size += $line_length;
if ($debug) print("Received content size: $received_content_size
\n");
if (($received_content_size >= $content_length) && ($content_length != 0))
{
if (!$chunked)
$finished_receiving = TRUE;
}
if (feof($remote))
$finished_receiving = TRUE;
if ($ok_response == TRUE)
$return_buffer = $return_buffer . $line . "\n";
}
$cached_page = $cached_page . $line; // Note that this variable contains also the HTTP response lines
} // End of while loop that reads the remote server response
fclose ($remote);
if ($chunked) // orripilante
$return_buffer = substr($return_buffer, 40);
return($return_buffer);
}