#!/usr/bin/env python

#
# Read on stdin bogofilter logs and extract some stats
#

import sys, time

def read_line(str):
    """ read a line of log and return useful infos """
    fields   = str.split()

    strdate  = "%s %s %s" % (fields[0], fields[1], fields[2])
    date     = time.strptime(strdate, "%b %d %H:%M:%S")
    is_spam  = fields[6] == 'Yes,'
    
    # spamicity=1.000000,
    bogosity = float(fields[7].split('=')[1][:-1])

    return (date, is_spam, bogosity)

if __name__ == '__main__':
    logs  = sys.stdin.read()
    stats = {}

    for line in logs.split('\n'):
        if line.strip() == "":
            continue

        (date, is_spam, bogosity) = read_line(line.strip())
        index = time.strftime("%m%d", date)
        
        if not stats.has_key(index):
            stats[index] = {}
            stats[index]['total'] = 0
            stats[index]['bogo']  = 0
            stats[index]['spam']  = 0

        stats[index]['total'] = stats[index]['total'] + 1
        stats[index]['bogo']  = stats[index]['bogo'] + bogosity

        if is_spam:
            stats[index]['spam'] = stats[index]['spam'] + 1

    keys = stats.keys()
    keys.sort()

    if len(keys) > 0:
        print "Day: Mail Total (Spam - Part) Average spamicity"

    for d in keys:
        date     = time.strptime(d, "%m%d")
        strdate  = time.strftime("%d %b", date)
        total    = stats[d]['total']
        spam     = stats[d]['spam']
        mail     = total - spam
        part     = float(spam) / float(total) * 100.0
        sum_bogo = stats[d]['bogo']
        avg_bogo = sum_bogo / total
        
        print "%s: %3d %4d (%4d - %2d%%) %1.6f" % \
              (strdate, mail, total, spam, part, avg_bogo)
