Top 10 Instagram hashtags

Get the top 10 hastags from Instagram by crawlin top-hastags dot com. Afterwards download the first three pictures found under the hashtag.

This is the script:

from bs4 import BeautifulSoup
from selenium import webdriver
from urllib import urlopen
import time,sys
import urllib
 
#Top Instagram Hashtags
#
#@bitBulla www.ibulla.com
#marco.spitzbarth@zhdk.ch
#
#Top 10 Hashtags von top-hashtags.com/instagram
#Download Bilder (soviel) in Ordner, muss erstellt werden (topHash)
#Bilder sind in Ordner nummeriert gespeichert
#
#Credits: github.com/fleshgordo/webscrapin
 
def instaBildDownload(tag,soviel):
    url_bild= ("https://www.instagram.com/explore/tags/"+tag)
    bild_holen = webdriver.Firefox()
    bild_holen.get(url_bild)
    bild_source = bild_holen.page_source
    soup = BeautifulSoup(bild_source,'html.parser')
    images = soup.find_all("img")
    top3 = images[:soviel]
    zaehler = 0
    for image in top3:
        zaehler = zaehler + 1
        linkZumBild = image.attrs["src"]
        urllib.urlretrieve(linkZumBild,"topHash/"+str(tag)+"_" + str(zaehler) + ".jpg")
    print (str(soviel)+" Bilder zum Hashtag: #"+str(tag)+" --> gespeichert")
 
url="https://top-hashtags.com/instagram/"
crawler = webdriver.Firefox()
crawler.get(url)
source = crawler.page_source
topSoup = BeautifulSoup(source,'html.parser')
tags = topSoup.find_all("div", { "class" : "tht-tag small-7 medium-9 columns" })
top_hash = []
for tag in tags:
    aTags = tag.find_all("a")
    for a in aTags:
        top_hash.append(a.text)
top10 = top_hash[:10]
print ("+++ TOP 10 INSTA HASHES +++")
for topHash in top10:
    instaBildDownload(topHash[1:],3)
 
sys.exit()
 
#
#pandoc instaselfie.md --latex-engine=xelatex -o instaselfie.pdf
#
#zaehler = 0
#datei = open("top10.md","w")
#for image in images:
#    zaehler = zaehler + 1
#    print (zaehler)
#    datei.write(image.attrs["alt"].encode("utf-8"))
#    datei.write("\n")
#    datei.write("![Image caption](tmp/top10/top" + str(zaehler) + ".jpg)")
#    linkZumBild = image.attrs["src"]
#    urllib.urlretrieve(linkZumBild,"tmp/top10/top" + str(zaehler) + ".jpg")
#    datei.write("\n")
#    datei.write(r"\newpage")
#    datei.write("\n")
#    time.sleep(0.2)
#datei.close()
Warning: Trying to access array offset on value of type bool in /home/httpd/vhosts/ibulla.com/httpdocs/wiki/lib/tpl/bootstrap3/Template.php on line 588
  • art/py_topinsta.txt
  • Zuletzt geändert: 2018/04/24 12:08
  • von 127.0.0.1