#!/bin/bash # GNU All-Permissive License # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without any warranty. # Copyright 2009 Daniel Einspanjer - Mozilla Corporation NR == 1 { if (kf == "") { print "Arguments:" > "/dev/stderr"; print " Required:" > "/dev/stderr"; print " kf=\"keyFieldIndex(es)\" The key field(s) to tabulate as a | separated list" > "/dev/stderr"; print " Optional:" > "/dev/stderr"; print " cf=\"countFieldIndex(es)\" Aggregate each non-filtered value in these fields" > "/dev/stderr"; print " sf=\"sumFieldIndex\" Sum this field for each non-filtered value in this list of count fields" > "/dev/stderr"; print " avg=\"y\" Product a mean average as well" > "/dev/stderr"; print " ekv=\"excludeKeyValuesRegEx\" Exclude kf values matching this regex" > "/dev/stderr"; print " ecv=\"excludeCountValuesRegEx\" Exclude cf values matching this regex" > "/dev/stderr"; exit; } if (cf == "") { simple = 1; cf = kf; } numKeyFields = split(kf, keyFields, "|"); numCountFields = split(cf, countFields, "|"); } { if (NR % 10000 == 0) printf(" %d \r", NR) > "/dev/stderr"; #if ($NF == "") NF--; isFiltered = 0; keyValues = "" for (i = 1; i <= numKeyFields; i++) { keyField = keyFields[i]; if (ekv == "" || $keyField !~ ekv) { keyValues = keyValues $keyField "\t"; } else { isFiltered = 1; } } if (! isFiltered) { for (countField in countFields) { countFieldValue = $countFields[countField]; if (ecv == "" || countFieldValue !~ ecv) { if (sf == "") { occurances[keyValues, countFieldValue]++; } else { occurances[keyValues, countFieldValue]++; occuranceSums[keyValues, countFieldValue] += $sf; } } } } } END { for (entry in occurances) { split(entry, names, SUBSEP); if (length(names[1]) > maxName1) maxName1 = length(names[1]); if (length(names[2]) > maxName2) maxName2 = length(names[2]); } for (entry in occurances) { split(entry, names, SUBSEP); if (simple == 1) { if (sf == "") { printf "%-"maxName1"s\t%10'd\n", names[1], occurances[entry] | "sort"; } else { printf "%-"maxName1"s\t%10'd\t%10'd\n", names[1], occurances[entry], occuranceSums[entry] | "sort"; } } else { if (sf == "") { printf "%-"maxName1"s%-"maxName2"s\t%10'd\n", names[1], names[2], occurances[entry] | "sort"; } else { printf "%-"maxName1"s%-"maxName2"s\t%10'd\t%10'd\n", names[1], names[2], occurances[entry], occuranceSums[entry] | "sort"; } } } }