Commit d42cf15a authored by O'Reilly Media, Inc.'s avatar O'Reilly Media, Inc.
Browse files

Initial commit

parents
## Example files for the title:
# 9780596510046
[![9780596510046](https://resources.oreilly.com/uploads/system/group/avatar/3/tarsier_hi_elbows.png)](https://www.safaribooksonline.com/)
The following applies to example files from material published by O’Reilly Media, Inc. Content from other publishers may include different rules of usage. Please refer to any additional usage rights explained in the actual example files or refer to the publisher’s website.
O'Reilly books are here to help you get your job done. In general, you may use the code in O'Reilly books in your programs and documentation. You do not need to contact us for permission unless you're reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from our books does not require permission. Answering a question by citing our books and quoting example code does not require permission. On the other hand, selling or distributing a CD-ROM of examples from O'Reilly books does require permission. Incorporating a significant amount of example code from our books into your product's documentation does require permission.
We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN.
If you think your use of code examples falls outside fair use or the permission given here, feel free to contact us at <permissions@oreilly.com>.
Please note that the examples are not production code and have not been carefully testing. They are provided "as-is" and come with no warranty of any kind.
File added
class BigHash
def initialize(file)
@hash = {}
lines = 0
File.open(file).each_line do |line|
s = line.split
article = s[2].intern
if @hash[s[0]]
@hash[s[0]] << [ s[1], article ]
else
@hash[s[0]] = [ s[1], article ]
end
lines += 1
STDERR.puts "Line: #{lines}" if (lines % 100000) == 0
end
end
def find(key)
@hash[key]
end
end
class BinarySearch
attr_reader :keys, :values
def initialize(file, size = 0)
last = ''
@keys = Array.new(size)
@values = Array.new(size)
lines = 0
hosts = -1
File.open(file).each_line do |line|
s = line.split
article = s[2].intern
if s[0] != last
hosts += 1
@keys[hosts] = s[0]
@values[hosts] = []
last = s[0]
end
@values[hosts] << [ s[1], article ]
lines += 1
STDERR.puts "Line: #{lines}" if (lines % 100000) == 0
end
end
def find(target)
low = -1
high = @keys.size
while (high - low) > 1 do
probe = (high + low) / 2
if (@keys[probe] > target)
high = probe
else
low = probe
end
end
if low == -1 || @keys[low] != target
return nil
else
return @values[low]
end
end
end
counts = {}
counts.default = 0
ARGF.each_line do |line|
if line =~ %r{GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) }
counts[$1] += 1
end
end
unique = {}
lines = 0
last = ''
ARGF.each_line do |line|
lines += 1
puts "Line: #{lines}, unique #{unique.size}" if (lines % 100000) == 0
line.gsub!(/ .*$/, '')
next if line == last
unique[line] = 1
last = line
end
puts "Unique fetchers: #{unique.size}"
ARGF.each_line do |line|
if line =~ %r{GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) }
puts $1
end
end
ARGF.each_line do |line|
if line =~ %r{GET /ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+ }
puts line
end
end
require 'big-hash'
require 'binary-search'
# args are data file, test queries, and class-name to use
data_file = ARGV[0]
query_file = ARGV[1]
class_name = ARGV[2]
array_size = ARGV[3].to_i
t0 = Process::times
puts "#{t0.inspect}"
if class_name == 'Hash'
worker = BigHash.new(data_file)
elsif class_name == 'Binary'
worker = BinarySearch.new(data_file, array_size)
end
queries = 0
t1 = Process::times
puts "#{t1.inspect}"
File.open(query_file).each_line do |line|
line.chomp!
y = worker.find(line)
puts "ERROR, #{line} not found" unless y
n = worker.find(line.reverse)
puts "Oddly, #{line.reverse} was found" if n
queries += 1
end
t2 = Process::times
puts "#{t2.inspect}"
puts "Queries #{queries}, load time #{t1.utime - t0.utime}, " +
"run time #{t2.utime - t1.utime}"
require 'Time'
unique = {}
lines = 0
last = ''
# 81.27.128.143 - - [10/Dec/2006:07:00:03 -0800] "GET /ongoing/ongoing.atom HTTP/1.1" 304 - "-" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0"
ARGF.each_line do |line|
if line =~ %r{^([^S]+) - - \[(..)/(...)/(....):(..):(..):(..) .*GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) }
who = $1
t = Time.local($4, $3, $2, $5, $6, $7)
puts "#{who} #{t.to_i} #{$8}"
lines += 1
STDERR.puts "Line: #{lines} @ #{t}" if (lines % 100000) == 0
end
end
"GET /ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+ "
lines = 0
interval = ARGV[0].to_i || 1000
STDIN.each_line do |line|
lines += 1
puts line if (lines % interval) == 0
end
use strict;
my %counts = {};
while (<STDIN>) {
if (/GET \/ongoing\/When\/\d\d\dx\/(\d\d\d\d\/\d\d\/\d\d\/[^ .]+) /)
{
$counts{$1}++;
}
}
my @k = keys(%counts);
my @k = sort { $counts{$b} <=> $counts{$a} } @k;
my $i;
foreach $i (0 .. 9)
{
print "$counts{@k[$i]}: $k[$i]\n";
}
# line looks like:
# c80-216-32-218.cm-upc.chello.se - - [08/Oct/2006:06:37:48 -0700] "GET /ongoing/When/200x/2006/10/08/Grief-Lessons HTTP/1.1" 200 5945 "http://www.tbray.org/ongoing/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)
"
counts = {}
counts.default = 0
ARGF.each_line do |line|
if line =~ %r{GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) }
counts[$1] += 1
end
end
keys_by_count = counts.keys.sort { |a, b| counts[b] <=> counts[a] }
keys_by_count[0 .. 9].each do |key|
puts "#{counts[key]}: #{key}"
end
def count_one stream, counts
stream.each do |line|
# for example: From: Tim Bray <Tim.Bray@Sun.COM>
if line =~ /^From: .*<([^>]*)>\n$/
who = $1
if counts[who]
counts[who] += 1
else
counts[who] = 1
end
end
end
end
counts = {}
if ARGV.length == 0
count_one STDIN, counts
else
ARGV.each do |filename|
File.open(filename) { |file| count_one(file, counts) }
end
end
descending = counts.keys.sort { |a, b| counts[a] <=> counts[b] }
descending.reverse_each { |from| puts "From: #{from}: #{counts[from]}" }
counts = {}
counts.default = 0
ARGF.grep(/^From: .*<([^>]*)>\n$/) { counts[$1] += 1 }
descending = counts.keys.sort { |a, b| counts[b] <=> counts[a] }
descending.each { |from| puts "From: #{from}: #{counts[from]}" }
counts = {}
counts.default = 0
linecount = 0
ARGF.each do |line|
linecount += 1
STDERR.puts "line: #{linecount}" if linecount % 100000 == 0
counts[$1] += 1 if line =~ /^From: .*<([^>]*)>\n$/
end
descending = counts.keys.sort { |a, b| counts[b] <=> counts[a] }
descending.each { |from| puts "From: #{from}: #{counts[from]}" }
counts = {}
counts.default = 0
$<.grep(/^From: .*<([^>]*)>\n$/) { counts[$1] += 1 }
descending = counts.keys.sort { |a, b| counts[b] <=> counts[a] }
descending.each { |from| puts "From: #{from}: #{counts[from]}" }
counts = {}
counts.default = 0
t1 = Process::times
ARGF.each_line do |line|
if line =~ %r{GET /ongoing/When/\d\d\dx/(\d\d\d\d/\d\d/\d\d/[^ .]+) }
counts[$1] += 1
end
end
t2 = Process::times
keys_by_count = counts.keys.sort { |a, b| counts[b] <=> counts[a] }
keys_by_count[0 .. 9].each do |key|
puts "#{counts[key]}: #{key}"
end
t3 = Process::times
step1 = t2.utime - t1.utime
step2 = t3.utime - t2.utime
printf "Step 1: %g, Step 2: %g\n", step1, step2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment