Browse Source

Added cache for API responses

master
Johann Schmitz 2 years ago
parent
commit
66abb3995f
Signed by: Johann Schmitz <johann@j-schmitz.net> GPG Key ID: A084064277C501ED
3 changed files with 145 additions and 7 deletions
  1. 33
    0
      fuglu_safebrowsing/cache.py
  2. 44
    7
      fuglu_safebrowsing/lookup.py
  3. 68
    0
      tests/test_lookup.py

+ 33
- 0
fuglu_safebrowsing/cache.py View File

@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
import datetime


class SimpleCache(object):
def __init__(self):
self._data = {}
def get(self, url):
if url not in self._data:
return None
max_datetime, data = self._data[url]
if max_datetime <= datetime.datetime.utcnow():
del self._data[url]
return None
return data

def get_many(self, urls):
def _inner():
for u in urls:
x = self.get(u)
if x:
yield x
return list(_inner())
def add(self, url, data, timeout):
dt = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout)
self._data[url] = (dt, data)

+ 44
- 7
fuglu_safebrowsing/lookup.py View File

@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import re
import requests
from fuglu.shared import ScannerPlugin, DUNNO, string_to_actioncode

from fuglu_safebrowsing import VERSION
from fuglu_safebrowsing.cache import SimpleCache

DOMAINMAGIC_AVAILABLE = False
try:
@@ -31,7 +33,7 @@ class SafebrowsingLookupPlugin(ScannerPlugin):
'description': 'Default action to take on positive result',
}
}
self.cache = SimpleCache()
self.logger = self._logger()
@property
@@ -113,6 +115,10 @@ class SafebrowsingLookupPlugin(ScannerPlugin):
if not (self.api_key and urls):
return

cached_data = self.cache.get_many(urls)
self.logger.info("Found %s cached results", len(cached_data))
cached_urls = [x['threat']['url'] for x in cached_data]

request_data = {
'client': {
'clientId': 'fuglu-safebrowsing',
@@ -122,20 +128,51 @@ class SafebrowsingLookupPlugin(ScannerPlugin):
"threatTypes": self.threat_types,
"platformTypes": self.threat_platforms,
"threatEntryTypes": ["URL"],
"threatEntries": [{"url": u} for u in urls]
"threatEntries": [{"url": u} for u in urls if u not in cached_urls]
}
}
try:
response = requests.post(SAFEBROWSING_API_URL + '?key=%s' % self.api_key,
json=request_data,
**self.requests_kwargs)
response.raise_for_status()
data = {
'matches': []
}
return response.json()
if request_data['threatInfo']['threatEntries']:
response = requests.post(SAFEBROWSING_API_URL + '?key=%s' % self.api_key,
json=request_data,
**self.requests_kwargs)
response.raise_for_status()
data = response.json()

if data and 'matches' in data:
for match in data['matches']:
if "cacheDuration" not in match:
continue
cache_duration = self.cache_duration_to_seconds(match['cacheDuration'])
if not cache_duration:
continue
url = match['threat']['url']
self.cache.add(url, match, cache_duration)
data['matches'].extend(cached_data)
return data
except:
self.logger.exception("Request to Safebrowsing API failed")

def cache_duration_to_seconds(self, duration_s):
if not duration_s:
return 0
m = re.match("^(\d+)(?:\.\d+)s", duration_s, re.IGNORECASE)
if m:
return int(m.group(1))
return 0

def extract_urls(self, suspect):
extractor = domainmagic.extractor.URIExtractor()
textparts = " ".join(self.get_decoded_textparts(suspect.get_message_rep()))

+ 68
- 0
tests/test_lookup.py View File

@@ -152,7 +152,75 @@ class TestLookup(object):
assert plugin.examine(suspect) == DELETE

@mock.patch("fuglu_safebrowsing.lookup.requests")
def test_check_safebrowsing_cached(self, requests_mock, plugin, suspect):
url = "http://example.com"
data = {
"threatType": "MALWARE",
"platformType": "WINDOWS",
"threatEntryType": "URL",
"threat": {"url": url},
"threatEntryMetadata": {
"entries": [{
"key": "malware_threat_type",
"value": "landing"
}]
},
"cacheDuration": "300.000s"
}
plugin.cache.add(url, data, 300)

assert plugin.check_safebrowsing([url]) == {
'matches': [data]
}
requests_mock.post.assert_not_called()

@mock.patch("fuglu_safebrowsing.lookup.requests")
def test_check_safebrowsing_positive_result_cache(self, requests_mock, plugin, suspect):
url = "http://example.com"

data = {
"threatType": "MALWARE",
"platformType": "WINDOWS",
"threatEntryType": "URL",
"threat": {"url": url},
"threatEntryMetadata": {
"entries": [{
"key": "malware_threat_type",
"value": "landing"
}]
},
"cacheDuration": "300.000s"
}

response_mock = mock.MagicMock()
response_mock.json = mock.MagicMock(return_value={
"matches": [data]
})
requests_mock.post = mock.MagicMock(return_value=response_mock)
assert plugin.check_safebrowsing([url]) == {
'matches': [data]
}
requests_mock.post.assert_called()

# requery again to test that caches is used
assert plugin.check_safebrowsing([url]) == {
'matches': [data]
}
requests_mock.post.assert_called_once()

def test_extract_urls(self, plugin, suspect):
assert plugin.extract_urls(suspect) == [
"http://diechatburg.de/media/editors/tinymce/plugins/advlist/"
]

def test_cache_duration_to_seconds(self, plugin):
assert plugin.cache_duration_to_seconds(None) == 0
assert plugin.cache_duration_to_seconds("") == 0
assert plugin.cache_duration_to_seconds("123") == 0
assert plugin.cache_duration_to_seconds("123.0") == 0

assert plugin.cache_duration_to_seconds("300.0s") == 300
assert plugin.cache_duration_to_seconds("300.123s") == 300

Loading…
Cancel
Save