Singida ndio home
JF-Expert Member
- Feb 23, 2012
- 3,188
- 3,370
import os
import getpass
import requests
import urllib.request
from bs4 import BeautifulSoup
def createFolder(directory):
try:
if not os.path.exists(directory):
os.makedirs(directory)
except OSError:
print('Error: Creating directory. ' + directory)
username = getpass.getuser()
directory = f"C:\\Users\\{username}\\Desktop\\er40r"
createFolder(directory)
os.chdir(directory)
url = "http://www.ackyshine.com/vichekesho:_home"
html = requests.get(url)
soup = BeautifulSoup(html.text, "html.parser")
try:
html.raise_for_status()
except Exception as exc:
print('There was a problem: %s' % (exc))
def mchanganyiko(lists):
items = lists[0].select('.list-pages-item')
try:
for i in range(21):
link = items[i].select('a')
visit = 'http://www.ackyshine.com' + link[0].get('href')
html = requests.get(visit)
soup = BeautifulSoup(html.text, "html.parser")
try:
html.raise_for_status()
except Exception as exc:
print('There was a problem: %s' % (exc))
header = soup.select('span strong')
paragraph = soup.select('p')
with open(f"mchanganyiko{i}.txt", "w+", encoding="utf-8") as word:
word.write(f'{header[2].getText()}\n')
word.close()
with open(f'mchanganyiko{i}.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[5].getText()}\n')
word.close()
with open(f'mchanganyiko{i}.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[6].getText()}\n')
word.close()
except Exception as exc:
print('Report if error is printed. # mchanganyiko')
def mipya(lists):
items = lists[1].select('.list-pages-item')
try:
for i in range(21):
link = items[i].select('a')
visit = 'http://www.ackyshine.com' + link[0].get('href')
html = requests.get(visit)
soup = BeautifulSoup(html.text, "html.parser")
try:
html.raise_for_status()
except Exception as exc:
print('There was a problem: %s' % (exc))
header = soup.select('span strong')
paragraph = soup.select('p')
with open(f"mipya{i}.txt", "w+", encoding="utf-8") as word:
word.write(f'{header[2].getText()}\n')
word.close()
with open(f'mipya{i}.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[5].getText()}\n')
word.close()
with open(f'mipya{i}.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[6].getText()}\n')
word.close()
except Exception as exc:
print("Report if error is printed. #mipya")
def specifics(url):
html = requests.get(url)
soup = BeautifulSoup(html.text, "html.parser")
try:
html.raise_for_status()
except Exception as exc:
print('There was a problem: %s' % (exc))
header = soup.select('span strong')
paragraph = soup.select('p')
with open("specifics.txt", "w+", encoding="utf-8") as word:
word.write(f'{header[2].getText()}\n')
word.close()
with open('specifics.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[5].getText()}\n')
word.close()
with open('specifics.txt', "a", encoding="utf-8") as word:
word.write(f'\n{paragraph[6].getText()}\n')
word.close()
lists = soup.select('.col-sm-6')
specifics("http://www.ackyshine.com/vichekesho:hawa-wanaume-wanaopenda-wanawake-hovyo-barabarani")
mchanganyiko(lists)
mipya(lists)
Thanks, ila hii nadhani inachukua news zote, nikuwa nataka kutoka kwenye directory secific mkuuKuna script codecanyon niliiona Ina reviews nzuri Ina scrap ila sijaitest, Kuna watu wamei nullify hapa unaweza ijaribu
Download WP Content Crawler 1.8.0 - Get content from almost any site, automatically! Torrent | 1337x
WP Content Crawler 1.8.0 - Get content from almost any site, automatically!www.1377x.to
Mkuu hapo kwenyeNilitengeneza Hii JANA BUT Nimeona bado ina bugs kua hazi scrape jokes zote ila I'm not sure kama ulitaka hii but ilikuwa raha and challenging so
HAVE FUN WITH IT
Python:import os import getpass import requests import urllib.request from bs4 import BeautifulSoup def createFolder(directory): try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print ('Error: Creating directory. ' + directory) username = getpass.getuser() directory = f"C:\\Users\\{username}\\Desktop\\er40r" createFolder(directory) os.chdir(directory) url = "http://www.ackyshine.com/vichekesho:_home" html = requests.get(url) soup = BeautifulSoup(html.text, "html.parser") try: html.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) def mchanganyiko(lists): items = lists[0].select('.list-pages-item') try: for i in range(23): link = items[i].select('a') visit = 'http://www.ackyshine.com' + link[0].get('href') html = requests.get(visit) soup = BeautifulSoup(html.text, "html.parser") try: html.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) header = soup.select('span strong') try: for each in range(10): if len(header[each].text) > 40: word = open(f"mchanganyiko{i}.txt","w+") word.write(f'{header[each].getText()}\n') word.close() break else: word = open(f"mchanganyiko{i}.txt","w+") word.write(f'{header[each].getText()}\n') word.close() break except Exception as exc: word = open(f"mchanganyiko{i}.txt","w+") word.write(f'{header[each].getText()}\n') word.close() paragraph = soup.select('p') try: for num in range(10): if len(paragraph[num].text) > 100: word = open(f"mchanganyiko{i}.txt","w+") word.write(f'\n{paragraph[num].getText()}\n') word.close() break else: print('retrying') except Exception as exc: print("Reduce number of range.1") except Exception as exc: print('Reduce number of range.2') def mipya(lists): items = lists[1].select('.list-pages-item') try: for i in range(20): link = items[i].select('a') visit = 'http://www.ackyshine.com' + link[0].get('href') html = requests.get(visit) soup = BeautifulSoup(html.text, "html.parser") try: html.raise_for_status() except Exception as exc: print('There was a problem: %s' % (exc)) header = soup.select('span strong') try: for each in range(10): if len(header[each].text) > 40: word = open(f"mipya{i}.txt","w+") word.write(f'{header[each].getText()}\n') word.close() break else: word = open(f"mipya{i}.txt","w+") word.write(f'{header[each].getText()}\n') word.close() break except Exception as exc: word = open(f"mipya{i}.txt","w+") word.write(f'{header[each].getText()}\n') paragraph = soup.select('p') try: for num in range(10): if len(paragraph[num].text) > 100: word = open(f"mipya{i}.txt","w+") word.write(f'\n{paragraph[num].getText()}\n') word.close() break else: print('retrying...') except Exception as exc: print("Reduce number of range (#paragraph1).") except Exception as exc: print("Reduce number of range (#paragraph2).") lists = soup.select('.col-sm-6') mchanganyiko(lists) mipya(lists)
Mkuu hapo kwenye
url = "http://www.ackyshine.com/vichekesho:_home"
weka
url = "http://www.ackyshine.com/vichekesho:"
specifics('http://www.ackyshine.com/vichekesho:kweli-mitandao-imeharibu-watu-angalia-huyu-anacho')