Ti posto una funzione che fa una cosa vagamente simile, di un programmino che avevo scritto una vita fa. C'e' un sacco di roba inutile, lascio a te il compito di filtrare il tutto

codice:
// ************************************************************
// * function get_page($target_page)                          *
// ************************************************************
// * This function downloads the requested URL and returns it *
// ************************************************************

function get_page($target_page, $showheaders=true)
{
    // $debug = TRUE;

    $getting_headers = TRUE;

    // First thing off we clean up a bit the page URL:

    $target_page = str_replace("http://", "", $target_page);
    $target_page = str_replace("HTTP://", "", $target_page);

    // We look for the first slash:

    $first_slash_pos = strpos($target_page, "/");
    $last_slash_pos = strrpos($target_page, "/");

    // We divide the URL in the useful parts:

    $site = substr($target_page, 0, $first_slash_pos);
    $page = substr($target_page, $first_slash_pos);
    $directory = substr($target_page, 0, $last_slash_pos);

    // Now we actually get the page: open a connection to the site

    $remote = @fsockopen($site, 80, &$errno, &$errstr, 15);

    if(!$remote)
        {
        print("[b]Error opening connection: $errstr($errno)[b]\n");
        exit();
        }

    if ($debug)
        {
        print("Successfully opened connection to site $site
\n");
        flush();
        }

    $pagerequest = "GET $page HTTP/1.0\r\n".
                   // "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, text/plain, text/html, */*\n".
                   // "Accept-Language: en-us\n".
                   // "Accept-Encoding: \n".
                   "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.0)\n". // Mozilla/4.0 (compatible; MSIE 5.01; Windows NT)\n".
                   // "Referer: http://$site$page\n".
                   "Host: $site:80\r\n".
                   // "Connection: Keep-Alive\n".
                   "Accept: */*\r\n".
                   "\n\n";

    if ($showheaders)
        {
        print("Page request:

\n");
        print(nl2br($pagerequest));
        print("

\n");
        }

    fputs ($remote, $pagerequest);

    if ($debug)
        {
        print("Waiting for request data
\n\n");
        flush();
        }


    $finished_receiving = FALSE;
    $received_content_size = 0;

    while (!$finished_receiving)
        {
        // First thing off we try to separate the headers
        // from the rest and see if we got a 200 answer

        $line = fgets ($remote, 2000);

        $linecount ++;
        $line_length = strlen($line);

        if (strstr($line, "chunked"))
            {
            if ($debug) print("Found chunked encoding.
");
            $chunked = true;
            }

        // Extra: headers are always displayed:
        if (($getting_headers) && ($showheaders)) print("[H $linecount ] : $line
\n");

        if ($debug)
            {
            if ($getting_headers)
                {
                // print("[$linecount]: Received header data line [length:$line_length].
\n\n");
                // print("$line");
                // print("[H $linecount ] : $line
\n");
                }
            else
                {
                // print("[$linecount]: Received page data line [length:$line_length].
\n\n");
                $tmp_newline = str_replace("<", "&lt;", $line);
                $tmp_newline = str_replace(">", "&gt;", $tmp_newline);

                if (!$chunked)
                    print("[B $linecount ] : $tmp_newline
\n");
                else
                    {
                    print("[B $linecount ] : $tmp_newline
\n");
                    //for ($k = 0; $k < strlen($line); $k ++)
                    //    {
                    //    print(ord($line[$k]) . " ");
                    //    }
                    }
                }
            flush();
            }

        // Convertiamo tutto in lowercase

        $str = $line;
        $lowercase_line = strtolower($line);

        $str = str_replace("\n", "", $str);
        $str = str_replace("\r", "", $str);

        $lowercase_line = str_replace("\n", "", $lowercase_line);
        $lowercase_line = str_replace("\r", "", $lowercase_line);

        // This is what we do while we get the headers

        if ($getting_headers == TRUE)
            {
            // We look for the content length string:

            $cl_len = strlen("Content-Length: ");
            $beginning = substr($line, 0, $cl_len);

            if ($beginning == "Content-Length: ")
                {
                $content_length = substr($line, $cl_len) / 1; // We divide by 1 to make sure it gets formatted like a number :)
                if ($debug)
                    print("Found content length: $content_length
\n");
                }

            // Let's check if we get an OK response (first line must contain an http 200 code)
            if (
                ($linecount == 1)
               &&
                (($lowercase_line == "http/1.0 200 ok") || ($lowercase_line == "http/1.1 200 ok"))
               )
               { $ok_response = TRUE; if ($debug) print("OK response received
"); }

            // If we find an empty line while reading the HTTP headers,
            // this means that the headers are over and that the data
            // is coming:

            if (strlen($str) == 0)
                {
                $getting_headers = FALSE;
                if ($debug) print("Finished with the headers
");
                }
            }
        else
            {
            // If we are done with the headers, here's what we do

            $received_content_size += $line_length;

            if ($debug) print("Received content size: $received_content_size
\n");

            if (($received_content_size >= $content_length) && ($content_length != 0))
                {
                if (!$chunked)
                    $finished_receiving = TRUE;
                }

            if (feof($remote))
               $finished_receiving = TRUE;

            if ($ok_response == TRUE)
                $return_buffer = $return_buffer . $line . "\n";
            }

        $cached_page = $cached_page . $line; // Note that this variable contains also the HTTP response lines

        } // End of while loop that reads the remote server response

    fclose ($remote);

    if ($chunked) // orripilante
       $return_buffer  = substr($return_buffer, 40);


    return($return_buffer);
}