#!/usr/bin/python import urllib import simplejson import sys import os APIKEY = '### YOU KEY HERE ###' def get_suggest(text): gateway = 'http://api.zemanta.com/services/rest/0.0/' args = {'method': 'zemanta.suggest', 'api_key': APIKEY, 'text': text, 'format': 'json'} args_enc = urllib.urlencode(args) s = urllib.urlopen(gateway, args_enc).read() s = simplejson.loads(s) return s def main(): #f = open('textfile.txt', 'r') outname = os.path.splitext(os.path.basename(sys.argv[1]))[0] f = open(sys.argv[1]) output_combined = open(outname+'-combined.txt', 'w') output_tags = open(outname+'-tags.txt', 'w') output_links = open(outname+'-links.txt', 'w') s = f.read() spl = s.split() txt = "" c = 0 for i in spl: txt += i c += 1 if (c % 340) == 0: suggest = get_suggest(txt) keywords= suggest['keywords'] links = suggest['markup']['links'] for k in keywords: output = k["name"].replace(" ", "~") + '\n' try: output_combined.write(output) output_tags.write(output) except: pass for link in links: output = link['anchor'].replace(" ", "~") + '\n' try: output_combined.write(output)s output_links.write(output) except: pass txt = "" print c #breaking at 30k words for this experiment if c > 30000: break if __name__ == '__main__': main()