Forum Import Tool

This tool is still in development and subject to change.

Installing the software

# ubforums2ubwiki.sh
# Version 0.5
# Copyleft 2012 bodhi.zazen
# Assistance from bobweaver, cortman, forestpiskie, nothingspecial, and wildmanne39
# This program is distributed free under terms of the GNU General Public License (v. 3.0)
# Version 0.5 added sed commands to remove HTML formatted special characters. cortman

# script converts Ubuntu forums (VBulletin syntax) posts to
# Ubuntu wiki (moin moin markdown).

# Designed to facilitate transcribing Tutorials and documentation
# from Ubuntu forums to Ubuntu wiki.
# Pages for import are listed at
# https://help.ubuntu.com/community/Pages%20For%20Import

# Sanity checks
# need to check for html2wiki
[[ -x /usr/bin/html2wiki ]] || sudo apt-get -y install libhtml-wikiconverter-moinmoin-perl
# need to check for zenity
[[ -x /usr/bin/zenity ]] || sudo apt-get -y install zenity

# Variables
DIR="${HOME}/Documents/wiki"
# Does the working directory exist ? $HOME/Documents/wiki
[[ -d "${DIR}" ]] || mkdir -p "${DIR}"
AWK=$(which awk)
GREP=$(which egrep)
SED=$(which sed)
CONVERT='html2wiki --dialect MoinMoin'
WGET=$(which wget)
ZENITY=$(which zenity)
NAME=''
URL=''
if [ -x /usr/bin/gedit ]; then
  EDIT='/usr/bin/gedit'
elif [ -x /usr/bin/kate ]; then
  EDIT='/usr/bin/kate'
elif [ -x /usr/bin/leafpad ]; then
  EDIT='/usr/bin/leafpad'
else
  EDIT=$($ZENITY --title="Please choose an editor" --file-selection)
  return_value=$?
  if [ "$return_value" -ne "0" ]; then exit 1; fi
fi
CAT='/bin/cat'

# What post to transcribe
# Download to working directory ~/$HOME/Documents/wiki

URL=$($ZENITY --entry --title="Tutorial to transcribe" --text="Paste the tutorial URL from your browser")
return_value=$?
if [ "$return_value" -ne "0" ]; then exit 1; fi

# What working name to give our pages

NAME=$($ZENITY --entry --title="Title?" --text="Please enter your title")
return_value=$?
if [ "$return_value" -ne "0" ]; then exit 1; fi
FILE="${DIR}/${NAME}.html"
CLEAN="${DIR}/${NAME}.clean.html"
WIKI="${DIR}/${NAME}.wiki"

# Download

$WGET "${URL}" -O "${FILE}"
return_value=$?
if [ "$return_value" -ne "0" ]; then echo -e '\e[0;31m' "Download Failed"; exit 1; fi

# Clean up the raw html and then convert the clean html

# GREP -v '(Code|Quote):' removes the "Code:" and "Quote:" divisions
$AWK '/-- message --/ {n++}; /-- \/ message --/ {n++} ;{if (n == 1) {print}}; {if ( n > 1 ) {exit}}' "${FILE}" | $GREP -v '(Code|Quote):' > "${CLEAN}"
return_value=$?
if [ "$return_value" -ne "0" ]; then echo -e '\e[0;31m' "Cleanup Failed"; exit 1; fi

# Add wiki page header
$CAT <<EOF>"${WIKI}"
## <<Include(Tag/ContentCleanup)>>
## <<Include(Tag/StyleCleanup)>>
## <<Include(Tag/NeedsExpansion)>>
||<tablestyle="float:right; font-size: 0.9em; width:40%; background:#F1F1ED; margin: 0 0 1em 1em;" style="padding:0.5em;"><<TableOfContents>>||

EOF


# Convert
# The forums use [[BR]] for page breaks, the sed statement converts them to new lines
# The second sed http[s]: ... formats wiki links

$CONVERT "${CLEAN}" |  $SED -e 's|\[\[BR\]\]|\n|g' | $SED -e 's/\(\[http[s]*:\S*\)\(.*\]\)\(.*\)/\[\1|\2\]\3 /g' >> "${WIKI}"
return_value=$?
if [ "$return_value" -ne "0" ]; then echo -e '\e[0;31m' "Conversion Failed"; exit 1; fi

# Add CC license information
# http://creativecommons.org/licenses/by-sa/3.0/
echo -e "\nOriginally posted [[${URL}|The Ubuntu Forums (ubuntuforums.org)]]" >> "${WIKI}"

# Replace HTML special characters

sed -i 's/&lt;/</g' ${WIKI}
sed -i 's/&gt;/>/g' ${WIKI}
sed -i 's/\&amp;/\&/g' ${WIKI}


# Review the document
$EDIT "${WIKI}"
return_value=$?
if [ "$return_value" -ne "0" ]; then echo -e '\e[0;31m' "Opening document with your editor failed"; exit 1; fi

Save this as /usr/local/bin/ubforums2ubwiki.sh

or

~/bin/ubforums2ubwiki.sh

Make it executable

sudo chmod a+x /usr/local/bin/ubforums2ubwiki.sh

Run the tool

ubforums2ubwiki.sh

or create a desktop launcher. An image to be used as an icon can be found at https://launchpadlibrarian.net/101180651/logo64.png

https://launchpadlibrarian.net/101180651/logo64.png

Be aware that the tool only looks at the first post of the thread - any information within the thread will need to be manually added to the wiki once the initial paste has been done.

Running the tool will cause an entry box to show - paste in the URL for the first post of the tutorial you wish to convert. Followed when prompted by name for the new wiki - do not use spaces in the name.

url.png

title.png

Once the tool has run - a text editor window will open (the tool checks for the availability of a text editor) with the text, you can copy this for use in the wiki creation.

attachment:gedit.png

You can either create a new page by directly entering it's name, if it does not exist you will be prompted to create the new page.

Please make sure to not have spaces in the name you choose.

Alternatively you can use the Page Creation macro to do so.

Once you have the new page, paste in the text from your new foo.txt.new file, check for any further editing and when happy save changes. You can if you wish preview the page before saving.

attachment:wikidone.png

Other Options


If you would like to just make us aware of threads that could be suitable for conversion to a wiki then please add them to the Import Page

ForumWikiTeam/ImportTool (last edited 2012-09-11 20:31:26 by mail)