diff options
Diffstat (limited to 'lib/HttpCachingBridgeAbstract.php')
-rw-r--r-- | lib/HttpCachingBridgeAbstract.php | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/lib/HttpCachingBridgeAbstract.php b/lib/HttpCachingBridgeAbstract.php new file mode 100644 index 00000000..15cddc5b --- /dev/null +++ b/lib/HttpCachingBridgeAbstract.php @@ -0,0 +1,89 @@ +<?php +require_once(__DIR__ . '/BridgeInterface.php'); +/** + * Extension of BridgeAbstract allowing caching of files downloaded over http. + * TODO allow file cache invalidation by touching files on access, and removing + * files/directories which have not been touched since ... a long time + */ +abstract class HttpCachingBridgeAbstract extends BridgeAbstract { + + /** + * Maintain locally cached versions of pages to download, to avoid multiple downloads. + * @param url url to cache + * @return content of the file as string + */ + public function get_cached($url){ + // TODO build this from the variable given to Cache + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); + + if(file_exists($filepath)){ + $this->debugMessage('loading cached file from ' . $filepath . ' for page at url ' . $url); + // TODO touch file and its parent, and try to do neighbour deletion + $this->refresh_in_cache($cacheDir, $filepath); + $content = file_get_contents($filepath); + } else { + $this->debugMessage('we have no local copy of ' . $url . ' Downloading to ' . $filepath); + $dir = substr($filepath, 0, strrpos($filepath, '/')); + + if(!is_dir($dir)){ + $this->debugMessage('creating directories for ' . $dir); + mkdir($dir, 0777, true); + } + + $content = $this->getContents($url); + if($content !== false){ + file_put_contents($filepath, $content); + } + } + + return str_get_html($content); + } + + public function get_cached_time($url){ + // TODO build this from the variable given to Cache + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); + + if(!file_exists($filepath)){ + $this->get_cached($url); + } + + return filectime($filepath); + } + + private function refresh_in_cache($cacheDir, $filepath){ + $currentPath = $filepath; + while(!$cacheDir == $currentPath){ + touch($currentPath); + $currentPath = dirname($currentPath); + } + } + + private function buildCacheFilePath($url, $cacheDir){ + $simplified_url = str_replace( + ['http://', 'https://', '?', '&', '='], + ['', '', '/', '/', '/'], + $url); + + if(substr($cacheDir, -1) !== '/'){ + $cacheDir .= '/'; + } + + $filepath = $cacheDir . $simplified_url; + + if(substr($filepath, -1) === '/'){ + $filepath .= 'index.html'; + } + + return $filepath; + } + + public function remove_from_cache($url){ + // TODO build this from the variable given to Cache + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); + $this->debugMessage('removing from cache \'' . $filepath . '\''); + unlink($filepath); + } +} |