diff options
author | 2024-05-06 19:20:40 -0700 | |
---|---|---|
committer | 2024-05-06 19:20:40 -0700 | |
commit | 9af98de9098d953ecabc42224c737a899a79c9a5 (patch) | |
tree | 46d13a905afaae163dfc873e05920ca7d57ab1bc | |
parent | 15a52ac151f815d8d88ae3e4fd7db7b8a9662eb2 (diff) | |
download | CS172-Project-9af98de9098d953ecabc42224c737a899a79c9a5.tar.gz CS172-Project-9af98de9098d953ecabc42224c737a899a79c9a5.tar.zst CS172-Project-9af98de9098d953ecabc42224c737a899a79c9a5.zip |
Fix small bug in scraper
-rw-r--r-- | scraper.py | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -59,10 +59,10 @@ class ComputerScienceSpyder(CrawlSpider): yield { 'Domain': result.url.split('/')[2], - 'URL': result.url, 'Title': result.css('title::text').get(), - 'Content': content, - 'Image URLs': '|'.join(image_urls), + 'ParsedContent': content, + 'ImageResourceLocator': '|'.join(image_urls), + 'ResourceLocator': result.url, } file_size_bytes = os.path.getsize(OUTPUT_FILE) |