#!/usr/bin/awk -f # Create an atom feed from a list of plain text files. # First line of a post is title # Second line is date formatted %Y-%m-%dT%TZ # CHANGELOG : # 2022-05-19 : switch back to using variables -v but set defaults. add examples. # 2023-07-08 : remove useless empty lines at top of content, remove leading "#" in titles BEGIN { if ( ARGC != 2 ) { usage() } # check we have ARGV[1] # if the user didn't specify variables, use defaults if ( protocol == "" ) { protocl = "http"} if ( domain == "" ) { domain = "domain.tld"} if ( feedurl == "" ) { feedurl = protocol domain "/" ARGV[1] "/atom.xml" } if ( author == "" ) { author = "john_doe" } if ( email == "" ) { email = author "@" domain } if ( feedtitle == "" ) { feedtitle = "my amazing blog" } # command to list all posts. ARGV[1] is replaced by directory given as arg if ( cmd == "" ) { cmd = "find " ARGV[1] " -name '*.txt' -maxdepth 1 -a ! -name '_*' | sort -r" } # header print "" if ( xsl != "") { printf "", xsl } print "" printf "%s://%s/%s/\n", protocol, domain, ARGV[1] printf "%s\n", feedtitle printf "\n", feedurl printf "\n%s\n%s\n\n", author, email # let's go n = 0 feed_updated=strftime("%Y-%m-%dT%TZ", systime()) printf "%s\n", feed_updated while (( cmd | getline post ) == 1 ) { if ((max > 0) && (n > max)) { break } # get first line : title if ((getline title < post) != 1) { continue } # remove leading "#" if any sub("^# +?", "", title) # get second line : pubdate if ((getline pubdate < post) != 1) { continue } link = "/" post print "" printf "%s\n", title printf "%s://%s%s\n", protocol, domain, link printf "%s\n", pubdate printf "\n", protocol, domain, link printf "" printf "", "]]]]>", line); print line } close(post) print "]]>" print "" print "" n++ } close(cmd) } END { print "" } function usage() { printf "usage : awk -f atom.awk -v protocol="https" -v domain=\"domain.tld\" -v author=\"batman\" -v feedtitle=\"gotham's log\" posts/\n" -v xsl "/path/to/file/on/server.xsl posts/" exit 1 }