awk 'BEGIN{srand()}{print rand() FS \$0}"
log.txt
| sort -n -k 1,1 | awk '{$1="";print $0}' | sed 's/^[
\t]\+//'
awk 'BEGIN{srand()}{print rand() "#" $0}' log.txt
This prints a random number in the beginning of each line in log.txt and
prints it out to the terminal. There are two main parts to this very
short awk
program:
BEGIN {srand()}
: This is executed only once, and
it happens before awk
starts processing the lines of
the input file. The call to srand()
seeds the random
number generator with the current date and time. This causes the
multiple calls to the function rand() to return a different sequence
of random numbers each time the program is executed.{print rand() FS $0}
: This block is executed
once for every line of the input file, and it basically prints
out a random number followed by FS (by default a whitespace)
followed by the current line from the input text ($0).sort -n -k 1,1
This sorts the output from the awk program numerically using the random numbers we produced as the sorting key (field). The syntax -k 1,1 means "sort where the key starts at the first field and ends at the first field", in effect this is equivalent to "use only the first field to sort". As each line of the input file was given a random number in the previous command, sorting these lines according to their numbers will result in a new random ordering of the file lines every time we run it.
awk '{$1="";print $0}' | sed 's/^[ \t]\+//'
This part is basically cleaning the extra information we have added
to the file, namely the random numbers, by erasing the first field
using awk ( awk '{$1="";print $0}'
) and one or more whitespaces or
tab characters that follow (
sed 's/^[ \t]\+//'
).
#! /usr/bin/env awk -f # This block will run once at the beginning of the execution of the program and # will set the field separator (the special FS variable) to be a semicolon. BEGIN {FS = ";"} # This block will be executed once for each line of the input file. It # implicitly creates an associative array where the name of the restaurant # (which is always the second column of the input file, and is retrieved via the # awk special variable $2) is bound to a list of all names of all people who # attended the restaurants in every visit (available in the first column $1). { restaurants[$2] = restaurants[$2] "," $1 } # This block will be executed only once, and after AWK has processed all the # lines of the input file. END { for (k in restaurants){ # for each key (restaurant name) in the array printf("%s: ", k); # print the name of the restaurant followed by a ':' # split the names of people who went to this restaurant and store that # list in a new variable called 'names' split(restaurants[k], names, ","); # the next task is to remove duplicates from the list of customers. We # will do this with a simple trick. We will create an associative array # called 'customers' that has a dummy entry for each customer. Because # keys in an associative array are unique, this simple trick will result # in removing the duplicates from the list. for (l in names){ customer[names[l]]=1 } # because of the way we built the 'restaurants' associative array, the # string of customers names assigned to each restaurant will always begin with a ',' and so # the split will result in a customer with an empty string name. We will # delete that here. delete customer[""] # now just loop over the customers and print them out. for (c in customer){ printf("%s ", c); } # print a new line character when we are done with this restaurant. printf("\n"); # reset the array customer for the next restaurant delete customer } }
problem3.awk
#! /usr/bin/env awk -f BEGIN {FS = ";"} # Use ; as a delimiter. Executed once in the beginning. # for every line of the input file, we parse it with the code in this block: { n= split($1, table, ","); # split each name and store them to 'table' # This nested for loops extracts every pair of individuals in the given line for (i=1; i<=n; i++){ for (j=i+1; j<=n; j++){ # For example, given the line # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be # arranged by the values of table[i] and table[j]: # [Bruno,Hussam], [Bruno,Harsh], [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra], [Harsh,Atheendra] # 'friends' will be used to keep # track of pairs that we've observed so far. We're gonna mark a # pair, such as ["Bruno","Hussam"], whenever we see it using a # multidimensional array so that we won't print it again next # time we see this pair # friends["Bruno","Hussam"] and # friends["Hussam","Bruno"] do not necessarily contain the same # values, so we need to check them both. if (! friends[table[i],table[j]] && ! friends[table[j],table[i]]){ # if this pair hasn't been observed yet: print table[i] table[j] # print out this pair } # Mark this pair so that we won't print it again next time we see this pair friends[table[i],table[j]]= 1 } } }
problem4.awk
#! /usr/bin/env awk -f BEGIN {FS = ";"} # Use ; as a delimiter # for every line of the input file, we parse it with the code in this block: { n= split($1, table, ","); # split each name and store them to 'table' # This nested for loops extracts every pair of individuals in the given line for (i=1; i<=n; i++){ for (j=i+1; j<=n; j++){ # For example, given the line # "Bruno,Hussam,Harsh,Atheendra", the following pairs will be # arranged by the values of table[i] and table[j]: # [Bruno,Hussam], [Bruno,Harsh], # [Bruno,Atheendra], [Hussam,Harsh], [Hussam,Atheendra], # [Harsh,Atheendra] # 'friends' will be used to keep # track of how many times we have observed a particular pair # friends["Bruno","Hussam"] and # friends["Hussam","Bruno"] do not necessarily contain the same # values, so we need to increment them both. friends[table[i],table[j]]+= 1 friends[table[j],table[i]]+= 1 } } } # Once all lines are parsed, this block will be executed. END { # this for loop will iterate each pair name stored in 'friends' for (k in friends){ # split individuals' names from each # pair name. For example, k="Bruno SUBSEP Hussam" => # pair[1]="Bruno", pair[2]="Hussam" split(k, pair, SUBSEP); # 'found' will keep track of pairs that have been already printed. found[pair[1], pair[2]]= 1; if (! ((pair[2], pair[1]) in found)){ # print only dyad 3 if (friends[k] >= 3){ print pair[1] " " pair[2] } } } }
problem5.awk
#! /usr/bin/env awk -f #Set the Field Separator as semicolon BEGIN {FS = ";"} { n= split($1, table, ",") # split the names of people into table for (i=1; i<=n; i++){ # an associative array storing the name of # a person as key and accumulates the lists of people he/she has dined with as the # value # this array contains an exhaustive list of the diners with that person including repetition together[table[i]] = together[table[i]] "," $1 } } # After the file parsing is complete END { for (k in together){ # For each person printf("%s ", k); # print the person name split(together[k], names, ","); # split all the dining friends into names for (l in names){ unique[names[l]]=1 # Unique array contains the unique list of friends } delete unique[k] # delete my own name and delete unique[""] # the null string before the first comma d= length(unique) # length of the unique array gives number of connections of the person printf("%d\n",d); delete unique # delete unique so as to reset it for the next iteration } }
problem6.sh
#! /bin/bash # Run problem3.awk and do a line count to count the number of dyads ./problem3.awk restaurants.txt | wc -l # Run problem5.awk and sum the values in the second column (containing the degree of each person) ./problem5.awk restaurants.txt | awk '{sum+=$2}END{print sum}'
problem7.awk
#! /usr/bin/env awk -f # Set Field separator to Semicolon to separate the restaurant's name # and the people who visited the restaurant. BEGIN {FS = ";"} # Whenever you find Beula exists in the line (i.e., # whenever Beula visited the restaurant, # execute this code on each of these lines /Beula/ { n= split($1, table, ",") # split the names in this party into array tables beula = beula " " table[n-1] # to keep track of payers, append the payer's name of # this party to the list of payers, separated by space. } # After the whole file is parsed... END {printf ("Beula: %s\n", beula); # Print the space-separated payers n=split(beula, payers, " "); # Split the payers' names whenever Beula visited. for (i=1; i<=n ; i++){ # For each payer in the list # increment the number of times that payer # has paid. Here, the key is the payer's name, the value is the number of # times the payer has paid. pay[payers[i]] += 1 } # Print the number of times Beula paid printf("Beula paid %d/%d times\n", pay["Beula"],n) }