# Improvement: fix accents import json import urllib.request import time from bs4 import BeautifulSoup links = [] with urllib.request.urlopen('https://www.lanutrition.fr/les-aliments-a-la-loupe') as fp: content = fp.read() soup = BeautifulSoup(content, 'html.parser') for link in soup.find(id='nutriments_by_alpha').find_all('a'): href = link.get('href') links.append(f'https://www.lanutrition.fr{href}') def extractCarbohydrates(soup): try: return float(soup .find('div', {"name": 'glucides'}) .find(class_='value-weight') .get_text() .split(' ')[0]) except: return None def extractGlycemicIndex(soup): try: n = int(soup .find(class_='aliment-poids') .find(class_='nombre') .get_text()) if n > 0: return n except: return None aliments = [] for link in links: print(link) time.sleep(0.5) with urllib.request.urlopen(link) as fp: content = fp.read() soup = BeautifulSoup(content, 'html.parser') name = soup.find('h1', {"id": 'page-title'}).get_text() carbohydrates = extractCarbohydrates(soup) glycemicIndex = extractGlycemicIndex(soup) if name is not None and carbohydrates is not None and glycemicIndex is not None: print('+') aliments.append({ "name": name, "glycemicIndex": glycemicIndex, "carbohydrates": carbohydrates }) else: print('-') with open('export.json', 'w') as f: f.write(json.dumps(aliments))