#!/usr/bin/perl # Proxy server to dump http traffic. # Copyright 2002-2007 Patrick Killelea under the GPL. # You may freely copy and redistribute this file. # Version: Mon Mar 10 16:40:28 PDT 2008 # Please write p@patrick.net with improvements. # Documentation below. use bytes; use Fcntl; #use Posix qw(:sys_wait_h); # Do I really need this line? It makes sprocket break on Solaris. use Socket; use Time::HiRes 'time','sleep'; # default arguments: $proxy_port = 8080; $proxy_ip = "\x0\x0\x0\x0"; # process command line arguments, override defaults: while ($_ = $ARGV[0]) { shift; last if /^--$/; if (/^-p(\d+)$/) { $proxy_port = $1; } if (/^-i([\d\.]+)$/) { $proxy_ip = inet_aton($1); } } $SIG{"CHLD"} = "IGNORE"; $our_serv_params = pack ('S n a4 x8', &AF_INET, $proxy_port, $proxy_ip); $| = 1; socket (S, &AF_INET, &SOCK_STREAM, 0) or die "socket: $!"; setsockopt(S, SOL_SOCKET, SO_REUSEADDR, 1) or die "setsockopt: $!"; bind (S, $our_serv_params) or die "bind: $!"; listen (S, 5) or die "listen: $!"; print STDERR "$0 listening at $proxy_port. Clear your cache before surfing.\n"; while (1) { $addr = accept(BROWSER, S); die "accept() error: $!\n" unless ($addr); next if $pid = fork; # parent die "fork: $!" unless defined $pid; # child print STDERR "New child. Len of addr is " . length($addr) . "\n"; close (S); # Server socket is of no use in child. # Don't bother to unpack last x8 like we packed above, bc often crap in there. ($af, $client_port, $client_ip) = unpack('S n a4', $addr); @inetaddr = unpack('C4',$client_ip); print STDERR "$client_port new browser connection from " . join ('.', @inetaddr) . "\n"; $blksize = (stat BROWSER)[11] || 16384; # Make browser connection non-blocking. $flags = ''; fcntl(BROWSER, F_GETFL, $flags) or die "Couldn't get flags for BROWSER: $!\n"; $flags |= O_NONBLOCK; fcntl(BROWSER, F_SETFL, $flags) or die "Couldn't set flags for BROWSER: $!\n"; print STDERR "$client_port About to read from browser...\n"; $_ = ''; $len = -1; while (length($_) == 0 or defined $len or $need_more) { $len = sysread BROWSER, $buf, $blksize; if (!defined $len) { #print STDERR "$client_port Nothing to read from browser: $!\n"; sleep 0.1; next; } if ($len == 0) { print STDERR "$client_port BROWSER closed connection, quit this thread\n"; close BROWSER; close SERVER; exit; } if ($len > 0) { $_ .= $buf; } if (/Transfer-Encoding: chunked/i) { # For chunked encoding, we're not done reading unless we have \n0\r\n at the end. if (/\r\n0\r\n\r\n\Z/m) { $need_more = 0; print STDERR "$client_port Have chunked encoding, reached the end!\n"; } else { $need_more = 1; print STDERR "$client_port Have chunked encoding, still need more...\n"; } } if (/Content-Length: (\d+)/i) { # For transmissions with a Content-Length, we're not done reading until we have that many bytes. if (/\r\n\r\n.{$1}/s) { $need_more = 0; print STDERR "$client_port Have content length, got that many bytes!\n"; } else { $need_more = 1; print STDERR "$client_port Have content length, need more bytes...\n"; } } } $need_more = 0; print STDERR "$client_port Done reading from BROWSER.\n"; printf "\n$client_port >>>>>>>> %10.4f\n", time(); print; ####################################################################### # Transform request from proxy format to normal format, and adjust len. m|^[A-Z]+\s+http://([\.\w-]+)(:*(\d*))|i; $host = $1; $port = $3 || 80; # Remove the http://hostname:port part from the request. s|^([A-Z]+\s+)http://[\.\w-]+(:*(\d*))|$1|i; $len = length $_; ####################################################################### SERVER_CONNECTION: print STDERR "$client_port Creating connection to server\n"; # Look up the name we're connecting to. $iaddr = gethostbyname($host); $? && do { print STDERR ("$client_port gethostbyname error: $host $?"); $? = 0; print STDERR "$client_port hostname lookup failed\n"; close BROWSER; exit; }; # Create the TCP connection to the server. $proto = getprotobyname('tcp'); socket(SERVER, PF_INET, SOCK_STREAM, $proto) or die("socket: $!"); $paddr = sockaddr_in($port, $iaddr); connect(SERVER, $paddr); # Make server connection non-blocking. $flags = ''; fcntl(SERVER, F_GETFL, $flags) or die "Couldn't get flags for SERVER: $!\n"; $flags |= O_NONBLOCK; fcntl(SERVER, F_SETFL, $flags) or die "Couldn't set flags for SERVER: $!\n"; ####################################################################### print STDERR "$client_port About to write request to SERVER.\n"; $offset = 0; while ($len) { $written = syswrite SERVER, $_, $len, $offset; if ($! =~ /Bad file descriptor/) { print STDERR "$client_port $!\nGetting new server connection\n"; close SERVER; goto SERVER_CONNECTION; } die "SERVER write error: $!\n" unless defined $written; $len -= $written; $offset += $written; }; ####################################################################### # Now we reverse the whole process, reading from server, and writing to browser. ####################################################################### print STDERR "$client_port Finished writing request to SERVER, about to read reply from SERVER.\n"; $blksize = (stat SERVER)[11] || 16384; print STDERR "$client_port About to read from SERVER...\n"; $_ = ''; $len = -1; while (length($_) == 0 or defined $len or $need_more) { $len = sysread SERVER, $buf, $blksize; if (!defined $len) { #print STDERR "$client_port Nothing to read from SERVER: $!\n"; sleep 0.1; next; } if ($len == 0) { print STDERR "$client_port SERVER closed connection, nothing more to read\n"; $server_connected = 0; last; } if ($len > 0) { $_ .= $buf; } if (/Transfer-Encoding: chunked/i) { # For chunked encoding, we're not done reading unless we have \n0\r\n at the end. if (/\r\n0\r\n\r\n\Z/m) { $need_more = 0; print STDERR "$client_port Have chunked encoding, reached the end!\n"; } else { $need_more = 1; print STDERR "$client_port Have chunked encoding, still need more...\n"; } } if (/Content-Length: (\d+)/i) { # For transmissions with a Content-Length, we're not done reading until we have that many bytes. # RE's match at most about 32K, so we have to check len by len of header + len of body $content_length = $1; /^(.*?\r\n\r\n)/s; $header_length = length($1); if (length($_) - $header_length == $content_length) { $need_more = 0; $d && print STDERR time() . " Have content length, got that many bytes!\n"; } else { $need_more = 1; $d && print STDERR time() . " Have content length, need more bytes...\n"; } } } $need_more = 0; print STDERR "$client_port Done reading from SERVER.\n"; printf "\n$client_port <<<<<<<< %10.4f\n", time(); print; ####################################################################### $offset = 0; $len = length $_; while ($len) { # Handle partial writes. $written = syswrite BROWSER, $_, $len, $offset; if (!defined $written) { print STDERR "$client_port BROWSER write error: $!\n"; next; } $len -= $written; $offset += $written; }; print STDERR "$client_port Done writing result back to browser. Child exiting.\n"; close BROWSER; close SERVER; exit; # child exits } continue { close(BROWSER); # No use to forking parent. } __END__ About Sprocket Sprocket is a web proxy server which dumps a record of the traffic flowing through it. A record of web traffic is useful for: * creating web load tests * creating monitors for web sites * figuring out exactly what a browser is doing Being written in perl and not requiring any special libraries, sprocket runs without needing compliation or any changes on Unix, Macintosh, or Cygwin on Windows. Using Sprocket To Record To use sprocket, just call it from the command line and tell your browser to use it as its proxy server: % sprocket > out.http /home/patrick/bin/sprocket listening at 8080. Clear your cache before surfing. On Firefox, for example, you would click Tools -> Options... -> Network -> Settings -> Manual proxy configuration: Then enter the name of the machine sprocket is running on, and the port. The typical settings would be localhost port 8080. Then surf the pages you're interested in, and a transcript of your http traffic will be saved in out.http. sprocket continuously outputs status and diagnostic messages to stderr. If you don't want to see those, you can start sprocket with stderr redirected to /dev/null like this instead: % sprocket > out.http 2>/dev/null Output Format Sprocket delimits requests and responses with lines of the form: 2837 >>>>>>>> 1199832276.5563 and 2837 <<<<<<<< 1199832277.0243 where the first number is the local browser port, corresponding to a thread of execution in the browser, and the the second number is a unix timestamp, showing when the request or respond was sent. So an example transcript might look like this: 2853 >>>>>>>> 1199832944.7535 GET http://patrick.net/ HTTP/1.1^M Accept-Encoding: gzip, deflate^M Host: patrick.net^M User-Agent: Mozilla/5.0 (Macintosh; U; en-US; rv:1.8.1.10) Gecko/20071115 Firefox/2.0.0.10^M Accept: text/xml,application/xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5^M Accept-Language: en-us^M ---------------: ------------^M Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7^M Keep-Alive: 300^M Proxy-Connection: keep-alive^M ^M 2853 <<<<<<<< 1199832945.1593 HTTP/1.1 301 Moved Permanently^M Transfer-Encoding: chunked^M Date: Tue, 08 Jan 2008 22:55:38 GMT^M Server: Apache/2.0.54 (Unix) mod_ssl/2.0.54 OpenSSL/0.9.7l PHP/4.4.0 mod_gzip/2.0.26.1a^M X-Powered-By: PHP/4.4.0^M Location: http://patrick.net/housing/crash.html^M --------------: ---^M Content-Type: text/html^M ^M 6F^M Please use http://patrick.net/housing/crash.html instead ^M 0^M ^M Note that responses can come out of order, but it is possible to match up requests and responses by port number. Options To use a different proxy port, start with the -p option: % sprocket -p 8081 > out.http To bind to a specific IP address on a machine with multiple interfaces, use the -i option: % sprocket -i 10.32.0.106 > out.http Replaying Traffic To replay http traffic, first trim out most of the reponses with the trim, then use the replay command. See the source code of those commands for documentation.