Info

Full content requires Elite Membership, the below is only a preview of the first post in the discussion.

forexfactory calendar downloader

hi,

Here is my forex factory calendar downloader. It creates a CSV file containing historical events from forexfactory.
It's in python and uses lxml, it's a good start for those who never made web scrapping before. Code is quite clean, but hasn't any real error management yet.

Also, it creates a 'raw' CSV view of what is available on the website. It's not filling out @NA data, doesn't try to be smart about the data. I intend to add some 'smart' behaviour during the import inside the SQL database.

Have fun.

from future import unicode_literals
import codecs
import pprint
import lxml.html
import mechanize
import cookielib

#some utils
pp = pprint.PrettyPrinter()

#########################
#variables
#########################
START_YEAR = 2008
END_YEAR = 2013
URL = r"
OUTFILE = r"events.csv"
#########################

#our month list for the URL
monthslist = ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]

#sets up the browser
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

#set correct timezone
br.open(")
formindex = 0
for form in br.forms():
if "timezone.php" in form.action:
form["timezoneoffset"] = ["0"]
break
formindex += 1

br.select_form(nr=formindex)
br.submit()

def getData(html, outfile):
"""
Gets data from one page of events
"""
root = lxml.html.fromstring(html)
lines = root.find_class("calendar_row")
curWeekDay = None
curMonthDay = None
for event in lines:
date = event.xpath("td[@class='date']")[0]

#get the day of the month
weekDay = date.xpath("span")
monthDay = date.xpath("span/span")
if len(weekDay) > 0:
curWeekDay = weekDay[0].text
curMonthDay = monthDay[0].text

#get the time
time = event.xpath("td[@class='time']")[0].text if (len(event.xpath("td[@class='time']")) > 0) else ""

#get currency
currency = event.xpath("td[@class='currency']")[0].text if len(event.xpath("td[@class='currency']")) else ""

#get impact
impact = event.xpath("td[@class='impact']/span/@title")[0]\
if len(event.xpath("td[@class='impact']/span/@title")) else ""

#get name of event
nevent = event.xpath("td[@class='event']/span")[0].text if len(event.xpath("td[@class='event']/span")) > 0 else ""

#get actual
actual = event.xpath("td[@class='actual']")[0].text if len(event.xpath("td[@class='actual']")) else ""
#retry if actual is in a span (can happen if they colorize it)
if actual is None or len(actual.strip()) == 0:
actual = event.xpath("td[@class='actual']/span")[0].text if len(event.xpath("td[@class='actual']/span")) else ""
actual = actual.strip().replace("\n", " ") if actual is not None else ""

#get forecast
forecast = event.xpath("td[@class='forecast']")[0].text if len(event.xpath("td[@class='forecast']")) else ""
#retry if forecast is in a span (can happen if they colorize it)
if forecast is None or len(forecast.strip()) == 0:
forecast = event.xpath("td[@class='forecast']/span")[0].text if len(event.xpath("td[@class='forecast']/span")) else ""
forecast …