Practicals to Introduction to UNIX, summer semester 2017/18

Petr Kučera, KTIML MFF UK

Sample exam question and a solution

Download the source code


Deprecated: Function create_function() is deprecated in /home/kucerap/public_html/include/geshi.php on line 4698
#!/bin/sh

######################################################################
# Let's suppose we have a WWW page with "news", i.e. the index page
# contains a list of references to individual pages with messages in the
# same directory. The directory content is managed by email. Write a
# script implementing this work. The script reads mails coming to a
# special address, checks sender address (the "From:" header value)
# against a list of valid senders written in the FROM_ADDR shell variable
# (space separated list of addresses) and according to the mails, it
# modifies content of the directory.
# The index.html page has following format:
#
# <HTML>
# ... lines not containing <OL>
# <OL>
# <LI><A HREF=3D"message-file-name">page title</A> (dd. mm. yyyy)
# ...
# </OL>
# ... rest of the page
# The mail must contain:
#
# a) Subject: DEL date [n]
# then the n-th item from a given day (including the message page
# itself) is removed from the list in the index file; if the number is
# omitted, all news from a given day are removed.
# b) Subject: page title
# then the mail body contains new message page, it should be saved
# under a unique name and added to the index; the page title must be
# also checked for uniqueness
# Operation result (log) is sent to the original sender. Every
# incoming mail data (date, time, sender, subject) is logged to the
# access.log file. The files must be locked during modifying as a
# prevention against a parallel processing of two incoming mails.
######################################################################

# ASSUMPTION: The script gets no parameters, the input message is
# presented to its standard input (assume it is passed to the script
# in procmail or .forward using pipe).

# This variable would be set externally, it is set here to have a self contained
# example.
FROM_ADDR="alice@black.org bob@white.com"

LOCKFILE="/tmp/sample.lock"
SLEEP_INTERVAL=1
INDEX="index.html"
SUBJECT=
FROM=
ACCESS_LOG="access.log"
MSGFILE="/tmp/msg.$$"

unique_name() {
   prefix="page-$(date +"%d-%m-%C%y")-"
   max_number=$(find . -name "${prefix}*" -prune |\
      cut -d - -f 5- |\
      grep '^[[:digit:]]\{1,\}$' |\
      sort -nr | head -n 1)
   if [ -z "$max_number" ]
   then
      max_number=0
   fi
   echo "${prefix}$((max_number + 1))"
}

respond() {
   # The following line is commented so that we can test the script without
   # actually calling an email. It should be uncommented for an actual use.
   # echo "$1" | mail -s "Re: $SUBJECT" "$FROM"
   echo "mail back to $FROM, subject: Re: $SUBJECT"
   echo "msg: $1"
}

add_new_page() {
   dt="$(date +"%d. %m. %C%y")"
   if awk -v pgfile="$1" -v title="$2" -v from="$3" -v date="$dt" '
      BEGIN {
         state=0;
         unchanged=1
      }

      state == 2 { print; next }

      /<OL>/ { state=1; print; next }

      state == 0 { print; next }

      /<\/OL>/ {
         printf "<LI><A HREF=3D\"%s\">%s</A> (%s)\n", pgfile, title, date
         unchanged=0
         print
         state=2
      }

      /<LI>/ {
      line=$0
      sub("^ *<LI><A HREF=3D\"[^\"]*\">", "", line)
      sub("</A>[^<]*$", "", line)
      if ( line == title )
         state=2
       print
      }

      END { exit unchanged }
   '
"$INDEX" >/tmp/sample-new-index.$$
   then
      mv /tmp/sample-new-index.$$ "$INDEX"
      dt="$(date +"%d. %m. %C%y %H:%M:%S")"
      respond "[$dt] Added new page with title $2, file=$1, from $3"
   else
      rm /tmp/sample-new-index.$$
      rm "$1"
      dt="$(date +"%d. %m. %C%y %H:%M:%S")"
      respond "[$dt] Page with title $2 already exists, file=$1, from $3"
   fi
}

check_sender() {
   for x in $FROM_ADDR
   do
      if [ "$1" = "$x" ]
      then
         return 0
      fi
   done
   respond "Address $1 is not on the list."
   return 1
}

delete_page() {
   echo "$1" "$2"
   if ( echo "$1" | grep -q '^DEL[[:space:]]\{1,\}[[:digit:]]\{2\}\. [[:digit:]]\{2\}\. [[:digit:]]\{4\} [[:digit:]]\{1,\}$' )
   then
      n=$(echo "$1" | cut -d " " -f 5)
      page_date=$(echo "$1" | cut -d " " -f 2-4)
   else
      n="ALL"
      page_date=$(echo "$1" | cut -d " " -f 2-)
   fi
   echo "n=\"$n\", page_date=\"$page_date\""
   if awk -v n="$n" -v page_date="$page_date" -v msgfile="$MSGFILE" '
      BEGIN {
         cnt=0;
         state=0;
         unchanged=1;
      }

      state == 2 { print; next }

      /<OL>/ { state=1; print; next }

      state == 0 { print; next }

      /<\/OL>/ {
         print
         state=2
         next
      }

      /<LI>/ {
         line=$0
         sub("^.*<\/A> [(]", "", line)
         sub("[)][[:space:]]*$", "", line)
         if ( line == page_date )
         {
            ++cnt
            if (n == "ALL" || n == cnt)
            {
               split($0, a, "\"")
               fname=a[2]
               system("rm " fname)
               printf "Removed line %s\n", $0 >>msgfile
               unchanged=0
               next
            }
         }
         print
      }
      END { exit unchanged }
   '
"$INDEX" >/tmp/sample-new-index.$$
   then
      mv /tmp/sample-new-index.$$ "$INDEX"
      respond "$(cat "$MSGFILE")"
      rm "$MSGFILE"
   else
      respond "No lines found."
   fi
}

################################
# Here starts the main program #
################################

# Set noclobber
set -C
# Try locking in a cycle
while ! ( echo "Locked by process $$" > "$LOCKFILE" ) 2>/dev/null
do
   echo "sleep"
   sleep "$SLEEP_INTERVAL"
done
# Install automatic unlock in case of signal and exit
trap 'rm -f "$LOCKFILE"; exit $?' INT TERM EXIT

# Parse message header
while read -r line
do
   if [ -z "$line" ]
   then
      # In this case the header ends and body starts
      break
   elif ( echo "$line" | grep -q '^From:[[:space:]]' )
   then
      FROM=$(echo "$line" | sed 's/^From:[[:space:]]*//')
   elif ( echo "$line" | grep -q '^Subject:[[:space:]]' )
   then
      SUBJECT=$(echo "$line" | sed 's/^Subject:[[:space:]]*//')
   fi
done

echo "$(date +"[%d. %m. %C%y %H:%M:%S]") From: $FROM, Subject: $SUBJECT" >> "$ACCESS_LOG"

check_sender "$FROM" || exit 1

if ( echo "$SUBJECT" | grep -q "^DEL[[:space:]]" )
then
   delete_page "$SUBJECT" "$FROM"
else
   PAGEFILE=$(unique_name)
   cat >"$PAGEFILE"
   add_new_page "$PAGEFILE" "$SUBJECT" "$FROM"
fi