#!/bin/sh # soulcoud: save an url on archive.org and send a copy by email to read later # the email address is read from environment SOULCOUD_MAIL # Author: prx # licence: MIT # Require: curl, w3m or reader # one may replace mail command with msmtp or else # improve with wkhtml2pdf or other snapshot tool usage() { printf "usage:\n\t" printf "%s (-h) \n" "$0" printf "\t-h : print help\n" printf "\nexample:\n" printf "\t%s 'http://who-is-clarkkent.net' Superman id?\n" "$0" printf "\nThe page is sent to the mail address set in environment variable SOULCOUD_MAIL\n" exit } archive_url() { # archive the url and output the page link on archive.org curl -s "https://web.archive.org/$1" | sed -n 's/.*href="\([^"]*\).*/\1/p' } dump_page() { # use reader or lynx to echo page content #reader "$1" w3m -F -dump -O utf-8 -T text/html "$1" } if [ -z "${SOULCOUD_MAIL}" ]; then printf "You must set the environment variable SOULCOUD_MAIL\n" printf "Example: in ~/.profile, enter\n" printf "\texport SOULCOUD_MAIL=bruce.wayne.corp\n" exit fi while getopts 'h' c do case $c in h) usage ;; esac done shift $((OPTIND - 1)) test $# -lt 1 && usage url="$(printf "$*" | cut -d' ' -f1)" title="$(printf "$*" | cut -d' ' -f2-)" test -z "${title}" && title="${url}" ( # save the page with lynx dump_page "${url}" printf "\n---\n" printf "url: ${url}\n" # then save on archive.org case "${url}" in http* ) archived="$(archive_url ${url})" if [ $? -eq 0 ]; then printf "Saved on: https://web.archive.org/%s\n" "${archived}" fi ;; esac ) | mail -s "${title}" "${SOULCOUD_MAIL}"