Top 10 Instagram hashtags
Get the top 10 hastags from Instagram by crawlin top-hastags dot com. Afterwards download the first three pictures found under the hashtag.
This is the script:
from bs4 import BeautifulSoup from selenium import webdriver from urllib import urlopen import time,sys import urllib #Top Instagram Hashtags # #@bitBulla www.ibulla.com #marco.spitzbarth@zhdk.ch # #Top 10 Hashtags von top-hashtags.com/instagram #Download Bilder (soviel) in Ordner, muss erstellt werden (topHash) #Bilder sind in Ordner nummeriert gespeichert # #Credits: github.com/fleshgordo/webscrapin def instaBildDownload(tag,soviel): url_bild= ("https://www.instagram.com/explore/tags/"+tag) bild_holen = webdriver.Firefox() bild_holen.get(url_bild) bild_source = bild_holen.page_source soup = BeautifulSoup(bild_source,'html.parser') images = soup.find_all("img") top3 = images[:soviel] zaehler = 0 for image in top3: zaehler = zaehler + 1 linkZumBild = image.attrs["src"] urllib.urlretrieve(linkZumBild,"topHash/"+str(tag)+"_" + str(zaehler) + ".jpg") print (str(soviel)+" Bilder zum Hashtag: #"+str(tag)+" --> gespeichert") url="https://top-hashtags.com/instagram/" crawler = webdriver.Firefox() crawler.get(url) source = crawler.page_source topSoup = BeautifulSoup(source,'html.parser') tags = topSoup.find_all("div", { "class" : "tht-tag small-7 medium-9 columns" }) top_hash = [] for tag in tags: aTags = tag.find_all("a") for a in aTags: top_hash.append(a.text) top10 = top_hash[:10] print ("+++ TOP 10 INSTA HASHES +++") for topHash in top10: instaBildDownload(topHash[1:],3) sys.exit() # #pandoc instaselfie.md --latex-engine=xelatex -o instaselfie.pdf # #zaehler = 0 #datei = open("top10.md","w") #for image in images: # zaehler = zaehler + 1 # print (zaehler) # datei.write(image.attrs["alt"].encode("utf-8")) # datei.write("\n") # datei.write("![Image caption](tmp/top10/top" + str(zaehler) + ".jpg)") # linkZumBild = image.attrs["src"] # urllib.urlretrieve(linkZumBild,"tmp/top10/top" + str(zaehler) + ".jpg") # datei.write("\n") # datei.write(r"\newpage") # datei.write("\n") # time.sleep(0.2) #datei.close()