#!/usr/bin/perl # Replay a transcript of http traffic, flipping cookies as needed. # Copyright 2002-2008 Patrick Killelea under the GPL. # You may freely copy and redistribute this file. # Version: Thu Sep 11 14:52:38 PDT 2008 # Please write p@patrick.net with improvements. # Documentation below. use bytes; use Fcntl; use Socket; use Time::HiRes 'time','sleep'; while ($_ = $ARGV[0], /^-/) { shift; last if /^--$/; # If there is more than one local interface, we can bind to a particular IP. # Don't put any space between -b and the IP addr. if (/^-b([\d\.]+)/) { $bind_addr = $1; } $d = /d/; # print debugging output to STDERR $r = /r/; # redo request until expected answer obtained $s = /s/; # silent, don't output timestamp, latency, bytes, url line $t = /t/; # tolerate errors, just move on $v = /v/; # print verbose transcript of what we did } # Read whole file in single gulp. undef $/; $_ = <>; $| = 1; @requests = split(/.*>>>>>>>>.*\n/); # shift to ignore everything before 1st request marker. shift @requests; for ($i = 0; $_ = $requests[$i]; $i++) { ($request, $expected) = split(/\n.*<<<<<<<<.*\n/); # Split request into parts. ($perl, $firstline, $headers, $body) = $request =~ /(perl:.*?\n)?(.*?\r\n)(.*?\r\n\r\n)(.*)/s; eval $perl; $redo_count = 0; # Don't redo request more than $redo_count times. # Pick out the parts. $firstline =~ m|([A-Z]+)\s+(http(s*)://([\.\w-]+)(:*(\d*))(/*.*?))\s+(.*\n)| || die "bad request: $firstline\n"; if ($4 ne $host) { # If the request is to a new host, close any previous host. $server_connected = 0; close SERVER; } $cmd = $1; $url = $2; $ssl = $3 || 0; $host = $4; $port = $6 || 80; $path = $7; $version = $8; $normal = "$cmd $path $version"; # Remove any old cookies left in a transcript. They are no longer valid. $headers =~ s/Cookie: .*\n//g; if (keys(%cookiejar)) { # if we have cookies, add them onto headers chop $headers; # remove last \n from \r\n\r\n chop $headers; # remove last \r from \r\n\r $headers .= "Cookie: "; foreach $key (keys(%cookiejar)) { $headers .= "$key=$cookiejar{$key}; "; } chop $headers; # remove last ; from cookie chop $headers; # remove last space from cookie $headers .= "\r\n\r\n"; # end the request correctly } REDO: # v prints out in sprocket format so result can be played back yet again $v && print "\n>>>>>>>>\n$firstline$headers$body"; alarm(600); # Don't let anything hang for more than 10 minutes. $start = time(); $_ = httpcat($host, $port, $normal.$headers.$body); ####### Action! $diff = time() - $start; $len = length(); # Get the cookies the server gave us. while (/Set-Cookie: (.*?)=(.*?);/gi) { $cookiejar{$1} = $2; } $v && print "\n<<<<<<<<\n$_"; # Look for proof strings in response. $response = $_; foreach (split(/\n/, $expected)) { next if /^$/; if (/^perl:(.*)/) { eval $1; next; } # compare server response to expected, all lowercased # start out assuming that the answer is ok $errmsg = "ok"; if (index(lc($response), lc($_)) == -1) { # -1 is failure, no match $errmsg = "failed"; # if we're in this block, we failed to get at least one of the proof strings if ($r) { $d && print STDERR "Wrong answer. Redo...\n"; if ($redo_count++ < 100) { goto REDO; } else { $d && print STDERR "Too many redos. Quitting.\n"; die; } } if ($t) { $d && print STDERR "Wrong answer. Don't worry about it.\n"; next; } $mess = "\nFailed to get the following proof at input line $. :\n\n$_\n\n"; $mess .= "Here are the request and response:\n\n$normal$headers$body\n\n$response\n\n"; die $mess; } } output($start, $diff, $len, $url, $errmsg); } close SERVER; sub output { local ($start, $diff, $len, $url, $errmsg) = @_; if ($s) { return; } # Be silent. $fraction = $start - int($start); @ary = localtime($start); printf "%4.4d %2.2d %2.2d %2.2d %2.2d %6.3f %3.3f %7d %s %s\n", 1900 + $ary[5], ++$ary[4], $ary[3], $ary[2], $ary[1], $ary[0] + $fraction, $diff, $len, $url, $errmsg; } sub httpcat { local ($host, $port, $req) = @_; SERVER_CONNECTION: if (!$server_connected) { # If we have no connection to the server, create it. $d && print STDERR "Creating new connection to server\n"; # Look up the name we're connecting to. $iaddr = gethostbyname($host); $? && do { print STDERR ("gethostbyname error: $host $?"); $? = 0; print STDERR "hostname lookup failed\n"; exit; }; # Create the TCP connection to the server. $proto = getprotobyname('tcp'); socket(SERVER, PF_INET, SOCK_STREAM, $proto) or die("socket: $!"); # Bind to a specific load IP addr. if ($bind_addr) { bind(SERVER,sockaddr_in(0,inet_aton("$bind_addr"))) or die "bind: $!"; } $paddr = sockaddr_in($port, $iaddr); while (!connect(SERVER, $paddr)) { print "connect failed: $!\n"; print "request was: $req\n"; close SERVER; # Try again. $proto = getprotobyname('tcp'); socket(SERVER, PF_INET, SOCK_STREAM, $proto) or die("socket: $!"); $paddr = sockaddr_in($port, $iaddr); sleep 1; } # Make server connection non-blocking. $flags = ''; fcntl(SERVER, F_GETFL, $flags) or die "Couldn't get flags for SERVER: $!\n"; $flags |= O_NONBLOCK; fcntl(SERVER, F_SETFL, $flags) or die "Couldn't set flags for SERVER: $!\n"; $server_connected = 1; } else { $d && print STDERR "Using old connection to server\n"; } ####################################################################### $d && print STDERR time() . " About to write request to SERVER.\n"; $len = length($req); $offset = 0; while ($len) { $written = syswrite SERVER, $req, $len, $offset; if ($! =~ /Bad file (descriptor|number)/i) { $d && print STDERR time() . " $!\nGetting new server connection\n"; $server_connected = 0; goto SERVER_CONNECTION; } die "SERVER write error: $!\n" unless defined $written; $len -= $written; $offset += $written; }; ####################################################################### # Now we read from server, and writing to stdout. ####################################################################### $d && print STDERR time() . " Finished writing request to SERVER, about to read reply from SERVER.\n"; $blksize = (stat SERVER)[11] || 16384; $len = -1; $_ = ''; # If replaying an HTTP/1.0 request, default case is to need more data, until # server closes connection. Or we get Content-Length bytes and we quit. $need_more = 1 if ($version =~ m|HTTP/1.0|); $d && print STDERR time() . " version is $version and need_more is $need_more\n"; while (length($_) == 0 or defined $len or $need_more) { #$d && print STDERR time() . " About to sysread\n"; $len = sysread SERVER, $buf, $blksize; if (!defined $len) { #$d && print STDERR time() . " Nothing to read from SERVER: $!\n"; sleep 0.001; next; } if ($len == 0) { $d && print STDERR time() . " SERVER closed connection, nothing more to read\n"; $server_connected = 0; last; } if ($len > 0) { $_ .= $buf; } if (/Transfer-Encoding: chunked/i) { # For chunked encoding, we're not done reading unless we have \n0\r\n at the end. if (/\r\n0\r\n\r\n\Z/m) { $need_more = 0; $d && print STDERR time() . " Have chunked encoding, reached the end!\n"; } else { $need_more = 1; $d && print STDERR time() . " Have chunked encoding, still need more...\n"; } } if (/Content-Length: (\d+)/i) { # For transmissions with a Content-Length, we're not done reading until we have that many bytes. # RE's match at most about 32K, so we have to check len by len of header + len of body $content_length = $1; /^(.*?\r\n\r\n)/s; $header_length = length($1); if (length($_) - $header_length == $content_length) { $need_more = 0; $d && print STDERR time() . " Have content length, got that many bytes!\n"; } else { $need_more = 1; $d && print STDERR time() . " Have content length, need more bytes...\n"; } } } $need_more = 0; if ($version =~ m|HTTP/1.0|) { close(SERVER); $server_connected = 0; } $d && print STDERR time() . " Done reading from SERVER.\n"; return($_); } # sigalarm handler to quit after alarm() timeout, to kill hung scripts. $SIG{'ALRM'} = sub { syswrite(STDERR, "Caught SIGALRM in replay\n", 28); output($start, $diff, 0, $url, 'timeout'); die "timed out" unless $t; }; __END__ About Replay Reply plays back http traffic. The usual sequence of events is to capture traffic with sprocket, trim out irrelevant material with trim, and then replay with replay, like this: % sprocket > out.http % trim out.http > trimmed.http % replay trimmed.http Input Format Replay expects a file of http traffic as input, with requests preceeded by a single line containing: >>>>>>>> Each request must be followed by a single line containing the analogous <<<<<<<< and one or more "proof" lines which are searched for in the server's response. By default, replay will exit if any of the proof strings are not found in the server's response. Output Format Upon sucessfully replaying a request and getting all the expected proof lines, replay will print an output line giving the start time of the request, the latency until the answer was recieved, the number of bytes recieved, and the URL of the request: yyyy mm dd hh mm ss.sss latency bytes url For example: 2008 01 08 15 08 20.876 0.227 486 http://patrick.net/ Options Replay accepts the following options: -d to print debugging output to STDERR. By default, there is no debugging output. -r to keep redoing a request until the expected proof is obtained. To force a loop at a certain point, you can run replay -r with a script that contains a bogus proof string. -s to be completely silent and output nothing. -t to continue even if proof lines were not found in server output. -v to print a complete transcript of the http traffic being generated and recieved by replay. This traffic can itself be trimmed and replayed again. Creating A Load Test Traffic can be replayed many times in parallel to create a load test. For example, we can fork off 10 copies of a script to generate a spike of 10 nearly simultaneous requests: #!/bin/bash -x ((user = 0)) while ((user < 10)) do replay trimmed.http > out$user.txt & ((user = user + 1)) done This approach can be arbitrarily expanded to test in different ways, such as a slow ramp up in users, or a long soak test. Different users can be inserted into the trimmed test by running a replacement command on trimmed.http between each iteration, such as: perl -pi -e "s/user=user\d/user=user$user/" trimmed.http Creating A Monitor By replaying known traffic at regular intervals with a cron job, you can create a monitor of web performance. For example: # MIN HOUR DOM MOY DOW Commands #(0-59) (0-23) (1-31) (1-12) (0-6) (Note: 0=Sun) */10 * * * * cd /home/patrick; bin/replay homepage.http > homepage.out The free gnuplot tool can be used to plot the latencies in this file with a gnuplot config file like this: set term png set xdata time set timefmt "%Y %m %d %H %M" set ylabel "seconds" set title "homepage latency" set output "homepage.png" plot 'homepage.out' using 1:7 notitle with lines Spying On Your Browser You can run sprocket and grep to look for expected keywords in web traffic. For example, to see every GET request and nothing else: % sprocket 2>/dev/null | grep ^GET It's also instructive to just leave sprocket running for a while, and noting that your browser may be making requests you did not know about, for example update requests to Google or Microsoft.