From fcf073feeebe0d90a21f35765abd414f0da4c3ea Mon Sep 17 00:00:00 2001 From: Waqas Ahmed Date: Thu, 27 Aug 2015 11:28:20 +0500 Subject: [PATCH] specify user-agent for sites that return 403 --- opengraph/opengraph.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opengraph/opengraph.py b/opengraph/opengraph.py index 9cedb97..50e8834 100644 --- a/opengraph/opengraph.py +++ b/opengraph/opengraph.py @@ -47,7 +47,8 @@ def __getattr__(self, name): def fetch(self, url): """ """ - raw = urllib2.urlopen(url) + req = urllib2.Request(url, headers={'User-Agent' : "Mozilla"}) + raw = urllib2.urlopen(req, timeout=15) html = raw.read() return self.parser(html) @@ -123,4 +124,4 @@ def scrape_url(self, doc): def scrape_description(self, doc): tag = doc.html.head.findAll('meta', attrs={"name":"description"}) result = "".join([t['content'] for t in tag]) - return result \ No newline at end of file + return result