#!/bin/bash
# Download Movie trailers from Apple - downloads a single trailer per movie in Apple's traler XML feed
# specify whether or not to get HD trailers - download priority is 1080p > 720p > 480p > standard 640 wide
GET1080p=0
GET720p=0
GET480p=1
GETPOSTER=1
FEEDS="http://www.apple.com/trailers/home/xml/current.xml"
#for local testing specify a file instead of hitting the net for the feed
#FEEDS="./Apple640Trailers.xml"
# define programs
XMLSTARLET='xml'
AWK='gawk'
# hard-coded file extension for saved videos
# ideally we'd preserve the extension of the original movie file and only add the ".trailer" before it
# (in case there's anything other than .mov)
BEXTENSION=".trailer.mov"
# save location for the individual trailer folders
SAVEPATH="v:/Movies/zzztrailertest/"
#save path for the tracker file below
DLDBPATH="./"
# text file to keep track of completed downloads to prevent getting the same trailer the next time script runs
tail -5000 $DLDBPATH.downloaded.db > $DLDBPATH.downloaded.db.tmp
mv $DLDBPATH.downloaded.db.tmp $DLDBPATH.downloaded.db
# this cleans passed content of characters that are invalid for Windows filenames and some which are valid but unwanted
FILECLEANER_AWK='
{
## some html escapes:
gsub(">",">")
gsub("<","<")
gsub(""","\"")
gsub("”","\"")
gsub("„","\"")
gsub("‘","\"")
gsub("’","\"")
gsub("‚",",")
gsub("&","\\&")
## replace fancy "smart" quotes with straight equivalents
gsub("’","'"'"'")
gsub("‘","'"'"'")
gsub("“","\"")
gsub("”","\"")
gsub("„","\"")
gsub("„","\"")
## backquote to apostrophe
gsub("`","'"'"'")
## double quote to apostrophe
gsub("\"","'"'"'")
## select illegal filename characaters replaced by alternates
gsub(">",")")
gsub("<","(")
gsub("[:]"," - ")
gsub("[/]","-")
## backslash to dash
gsub("\\\\","-")
gsub("[?]","")
gsub("[|]","-")
gsub("*","+")
## double space to single space (we may have created a double space in a previous substitution)
gsub(" "," ")
## sanitize the rest:
## gsub("[^- '"'"'[:alnum:] _$+&={}\\[\\]()%@!;,.]*","")
gsub("^[[:blank:]]*", "")
gsub("[[:blank:]]*$", "")
## dump it to stdout
print
}
'
# main loop - passes once per feed specified above
for FEEDURL in $FEEDS; do
# set of partial movie metadata - only the fields we need for downloading, saving & tracking the video/image files.
IFS=$'\n' TRAILERS=(`$XMLSTARLET sel --net -E utf-8 -D -T -t -m "/records/movieinfo" \
-v "@id" -o '	' \
-v "info/title" -o '	' \
-v "info/postdate" -o '	' \
-v "preview/large" -o '	' \
-v "poster/xlarge" --nl \
$FEEDURL 2>/dev/null`)
# complete set of movie metadata to be saved out one file per video later - one record per line
IFS=$'\n' movieFields=(`$XMLSTARLET sel --net -E utf-8 -t -m "/records/movieinfo" \
-c "." \
--nl \
$FEEDURL 2>/dev/null`)
recordDATE=`$XMLSTARLET sel --net -D -T -t -m "/records" \
-v "@date" \
$FEEDURL 2>/dev/null`
# individual feed loop - passes once per movie in feed
count=-1
for MOVIE in "${TRAILERS[@]}"; do
# bash (and ksh and zsh) can do math this way
count=$(($count+1))
# notice I set the delimiter with an argument instead of in a BEGIN
MOVIEID=`echo $MOVIE | $AWK -F'\t' '{ print $1 }' 2>/dev/null`
MOVIETITLE=`echo $MOVIE | $AWK -F'\t' '{ print $2 }' 2>/dev/null`
# giving the script as an argument instead of a file containing the script
MOVIETITLEFILE=`echo "$MOVIETITLE" | $AWK "${FILECLEANER_AWK}"`
POSTDATE=`echo $MOVIE | $AWK -F'\t' '{ print $3 }' 2>/dev/null`
# web path to the video file referenced in the feed xml
PREVIEW=`echo $MOVIE | $AWK -F'\t' '{ print $4 }' 2>/dev/null`
# filename substitutions to allow getting HD versions of the referenced file
# HARD CODED - need logic if referenced names have extensions other than "h640w.mov"
PREVIEW1080p=${PREVIEW%%h640w.mov}h1080p.mov
PREVIEW720p=${PREVIEW%%h640w.mov}a720p.mov
PREVIEW480p=${PREVIEW%%h640w.mov}h480p.mov
# web path to the poster file
POSTER=`echo $MOVIE | $AWK -F'\t' '{ print $5 }' 2>/dev/null`
# new local filename to save poster file
NEWPOSTERNAME="folder.jpg"
# added braces around the variable names for clarity
MOVIESAVEPATH="${SAVEPATH}${MOVIETITLEFILE}"
# create a folder for the downloaded files (using the movie's cleaned name)
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
mkdir -p $MOVIESAVEPATH
fi
# save the trailer's XML data to its own file within the trailer's folder
echo -e "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<records date=\"$recordDATE\">${movieFields[$count]}</records>" >$MOVIESAVEPATH/temp.xml
# reformat the XML to make it human-readable
`$XMLSTARLET format $MOVIESAVEPATH/temp.xml >$MOVIESAVEPATH/description.xml`
`rm $MOVIESAVEPATH/temp.xml`
# get and save a 1080p (1920x...) resolution video file
if [ "$GET1080p" -eq "1" ]; then
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
# new local filename to save video file
NEWPREVIEWNAME="${MOVIETITLEFILE} [1080p]${BEXTENSION}"
wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW1080p; PREVIEWOUT1080p=$?
if [ $PREVIEWOUT1080p -eq 0 ]; then
echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
else
echo "##### ID:$MOVIEID URL:$PREVIEW1080p FAILED -- TRYING NEXT LOWER SIZE"
fi
fi
fi
# or get and save a 720p (1280x...) resolution video file
if [ "$GET720p" -eq "1" ]; then
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
# new local filename to save video file
NEWPREVIEWNAME="${MOVIETITLEFILE} [720p]${BEXTENSION}"
wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW720p; PREVIEWOUT720p=$?
if [ $PREVIEWOUT720p -eq 0 ]; then
echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
else
echo "##### ID:$MOVIEID URL:$PREVIEW720p FAILED -- TRYING NEXT LOWER SIZE"
fi
fi
fi
# or get and save a 480p (848x...) resolution video file
if [ "$GET480p" -eq "1" ]; then
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
# new local filename to save video file
NEWPREVIEWNAME="${MOVIETITLEFILE} [480p]${BEXTENSION}"
wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW480p; PREVIEWOUT480p=$?
if [ $PREVIEWOUT480p -eq 0 ]; then
echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
else
echo "##### ID:$MOVIEID URL:$PREVIEW480p FAILED -- TRYING STANDARD SIZE"
fi
fi
fi
# or get and save the standard (640x...) resolution video file as referenced in the XML feed
if ! grep -q "###$MOVIEID.PREVIEW" $DLDBPATH.downloaded.db; then
# new local filename to save video file
NEWPREVIEWNAME="${MOVIETITLEFILE}${BEXTENSION}"
wget -c -O "$MOVIESAVEPATH/$NEWPREVIEWNAME" $PREVIEW; PREVIEWOUT=$?
if [ $PREVIEWOUT -eq 0 ]; then
echo "###$MOVIEID.PREVIEW $NEWPREVIEWNAME" >> $DLDBPATH.downloaded.db
else
echo "##### ID:$MOVIEID URL:$PREVIEW FAILED -- RETRY NEXT RUN"
fi
else
echo "##### Trailer ID:$MOVIEID NAME:$MOVIETITLE MARKED DONE -- SKIPPING"
fi
# get and save the movie poster image
if [ "$GETPOSTER" -eq "1" ]; then
if ! grep -q "###$MOVIEID.POSTER" $DLDBPATH.downloaded.db; then
wget -c -O "$MOVIESAVEPATH/$NEWPOSTERNAME" $POSTER; POSTEROUT=$?
if [ $POSTEROUT -eq 0 ]; then
echo "###$MOVIEID.POSTER $NEWPOSTERNAME" >> $DLDBPATH.downloaded.db
else
echo "##### $ID:$MOVIEID URL:$POSTER FAILED -- RETRY NEXT RUN"
fi
else
echo "##### Poster ID:$MOVIEID NAME:$MOVIETITLE MARKED DONE -- SKIPPING"
fi
fi
done
done