aboutsummaryrefslogtreecommitdiff
path: root/pym
diff options
context:
space:
mode:
authorCorentin Chary <corentin.chary@gmail.com>2012-08-03 21:50:54 +0200
committerCorentin Chary <corentin.chary@gmail.com>2012-08-03 21:50:54 +0200
commit8d912379886e00815aeb7ea1aed6af8f4bb62fbc (patch)
treedb80a5ddd84cf1e4dbf5bf79cc450478e6fd30de /pym
parenteuscan: hopefully better CPAN gentoo version mangling (diff)
downloadeuscan-8d912379886e00815aeb7ea1aed6af8f4bb62fbc.tar.gz
euscan-8d912379886e00815aeb7ea1aed6af8f4bb62fbc.tar.bz2
euscan-8d912379886e00815aeb7ea1aed6af8f4bb62fbc.zip
euscan/handlers: rework handlers, better remote-id + watch support
Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
Diffstat (limited to 'pym')
-rw-r--r--pym/euscan/handlers/__init__.py190
-rw-r--r--pym/euscan/handlers/cpan.py (renamed from pym/euscan/handlers/url/cpan.py)63
-rw-r--r--pym/euscan/handlers/generic.py (renamed from pym/euscan/handlers/url/generic.py)15
-rw-r--r--pym/euscan/handlers/github.py (renamed from pym/euscan/handlers/url/github.py)18
-rw-r--r--pym/euscan/handlers/kde.py (renamed from pym/euscan/handlers/url/kde.py)17
-rw-r--r--pym/euscan/handlers/package/__init__.py19
-rw-r--r--pym/euscan/handlers/package/remote_id.py44
-rw-r--r--pym/euscan/handlers/package/watch.py139
-rw-r--r--pym/euscan/handlers/pear.py11
-rw-r--r--pym/euscan/handlers/pecl.py11
-rw-r--r--pym/euscan/handlers/php.py (renamed from pym/euscan/handlers/url/php.py)35
-rw-r--r--pym/euscan/handlers/pypi.py (renamed from pym/euscan/handlers/url/pypi.py)24
-rw-r--r--pym/euscan/handlers/rubygems.py (renamed from pym/euscan/handlers/url/rubygems.py)22
-rw-r--r--pym/euscan/handlers/url.py98
-rw-r--r--pym/euscan/handlers/url/__init__.py19
-rw-r--r--pym/euscan/helpers.py115
-rw-r--r--pym/euscan/mangling.py163
-rw-r--r--pym/euscan/scan.py18
18 files changed, 512 insertions, 509 deletions
diff --git a/pym/euscan/handlers/__init__.py b/pym/euscan/handlers/__init__.py
index 899ef24..018d095 100644
--- a/pym/euscan/handlers/__init__.py
+++ b/pym/euscan/handlers/__init__.py
@@ -1,48 +1,124 @@
-import sys
-from euscan import CONFIG, output
-from euscan.handlers.package import handlers as pkg_handlers
-from euscan.handlers.url import handlers as url_handlers
-
+import os, sys
+import pkgutil
-def find_best_pkg_handler(pkg):
+from euscan import CONFIG, output
+import euscan.mangling
+
+from gentoolkit.metadata import MetaData
+
+handlers = {'package' : [], 'url' : [], 'all' : {}}
+
+# autoimport all modules in this directory and append them to handlers list
+for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
+ module = loader.find_module(module_name).load_module(module_name)
+ if not hasattr(module, 'HANDLER_NAME'):
+ continue
+ if hasattr(module, 'scan_url'):
+ handlers['url'].append(module)
+ if hasattr(module, 'scan_pkg'):
+ handlers['package'].append(module)
+ handlers['all'][module.HANDLER_NAME] = module
+
+# sort handlers by priority
+def sort_handlers(handlers):
+ return sorted(
+ handlers,
+ key=lambda handler: handler.PRIORITY,
+ reverse=True
+ )
+
+handlers['package'] = sort_handlers(handlers['package'])
+handlers['url'] = sort_handlers(handlers['url'])
+
+def find_best_handler(kind, pkg, *args):
"""
Find the best handler for the given package
"""
- for handler in pkg_handlers:
- if handler.can_handle(pkg):
+ for handler in handlers[kind]:
+ if handler.can_handle(pkg, *args):
return handler
return None
+def find_handlers(kind, names):
+ ret = []
-def find_best_url_handler(pkg, url):
- """
- Find the best handler for the given url
- """
- for handler in url_handlers:
- if handler.can_handle(pkg, url):
- return handler
- return None
+ for name in names:
+ # Does this handler exist, and handle this kind of thing ? (pkg / url)
+ if name in handlers['all'] and handlers['all'][name] in handlers[kind]:
+ ret.append(handlers['all'][name])
+ return ret
-def scan(pkg, urls, on_progress=None):
- """
- Scans upstream for the given package.
- First tries if a package wide handler is available, then fallbacks
- in url handling.
- """
- pkg_handler = find_best_pkg_handler(pkg)
- if pkg_handler:
- if on_progress:
- on_progress(increment=35)
+def get_metadata(pkg):
+ metadata = {}
+
+ pkg_metadata = None
+
+ meta_override = os.path.join('metadata', pkg.category, pkg.name, 'metadata.xml')
+
+ try:
+ if os.path.exists(meta_override):
+ pkg_metadata = MetaData(meta_override)
+ output.einfo('Using custom metadata: %s' % meta_override)
+ if not pkg_metadata:
+ pkg_metadata = pkg.metadata
+ except Exception, e:
+ output.ewarn('Error when fetching metadata: %s' % str(e))
+
+ if not pkg_metadata:
+ return {}
+
+ # Support multiple remote-id and multiple watch
+ for upstream in pkg_metadata._xml_tree.findall("upstream"):
+ for node in upstream.findall("watch"):
+ options = dict(node.attrib)
+ options['data'] = node.text
+
+ if "type" in options:
+ handler = options['type']
+ else:
+ handler = "url"
+ options['type'] = "url"
+
+ for key in ["versionmangle", "downloadurlmangle"]:
+ value = options.get(key, None)
+ if value:
+ options[key] = value.split(";")
+
+ if handler not in metadata:
+ metadata[handler] = []
+ metadata[handler].append(options)
+
+ for upstream in pkg_metadata._xml_tree.findall("upstream"):
+ for node in upstream.findall("remote-id"):
+ handler = node.attrib.get("type")
+ if not handler:
+ continue
+ if handler in metadata:
+ for i in range(len(metadata[handler])):
+ if not metadata[handler][i]['data']:
+ metadata[handler][i]['data'] = node.text
+ else:
+ metadata[handler] = [{'type' : handler, 'data' : node.text }]
+
+ return metadata
+
+def scan_pkg(pkg_handler, pkg, options, on_progress=None):
+ versions = []
- if not CONFIG['quiet'] and not CONFIG['format']:
- sys.stdout.write("\n")
+ if on_progress:
+ on_progress(increment=35)
+
+ for o in options:
+ versions += pkg_handler.scan_pkg(pkg, o)
- versions = pkg_handler.scan(pkg)
+ if on_progress:
+ on_progress(increment=35)
- if on_progress:
- on_progress(increment=35)
- return versions
+ return versions
+
+def scan_url(pkg, urls, options, on_progress=None):
+ versions = []
if on_progress:
progress_available = 70
@@ -52,16 +128,12 @@ def scan(pkg, urls, on_progress=None):
else:
progress_increment = 0
- versions = []
-
for filename in urls:
for url in urls[filename]:
if on_progress and progress_available > 0:
on_progress(increment=progress_increment)
progress_available -= progress_increment
- if not CONFIG['quiet'] and not CONFIG['format']:
- sys.stdout.write("\n")
output.einfo("SRC_URI is '%s'" % url)
if '://' not in url:
@@ -69,8 +141,9 @@ def scan(pkg, urls, on_progress=None):
continue
try:
- url_handler = find_best_url_handler(pkg, url)
- versions.extend(url_handler.scan(pkg, url))
+ url_handler = find_best_handler('url', pkg, url)
+ for o in options:
+ versions += url_handler.scan_url(pkg, url, o)
except Exception as e:
output.ewarn(
"Handler failed: [%s] %s" %
@@ -84,3 +157,44 @@ def scan(pkg, urls, on_progress=None):
on_progress(increment=progress_available)
return versions
+
+def scan(pkg, urls, on_progress=None):
+ """
+ Scans upstream for the given package.
+ First tries if a package wide handler is available, then fallbacks
+ in url handling.
+ """
+
+ if not CONFIG['quiet'] and not CONFIG['format']:
+ sys.stdout.write('\n')
+
+ metadata = get_metadata(pkg)
+ versions = []
+
+ pkg_handlers = find_handlers('package', metadata.keys())
+ if not pkg_handlers:
+ pkg_handler = find_best_handler('package', pkg)
+ if pkg_handler: pkg_handlers = [pkg_handler]
+
+ for pkg_handler in pkg_handlers:
+ options = metadata.get(pkg_handler.HANDLER_NAME, [{}])
+ versions += scan_pkg(pkg_handler, pkg, options, on_progress)
+
+ if not pkg_handlers:
+ versions += scan_url(pkg, urls, [{}], on_progress)
+
+ return versions
+
+def mangle(kind, name, string):
+ if name not in handlers['all']:
+ return None
+ handler = handlers['all'][name]
+ if not hasattr(handler, 'mangle_%s' % kind):
+ return None
+ return getattr(handler, 'mangle_%s' % kind)(string)
+
+def mangle_url(name, string):
+ return mangle('url', name, string)
+
+def mangle_version(name, string):
+ return mangle('version', name, string)
diff --git a/pym/euscan/handlers/url/cpan.py b/pym/euscan/handlers/cpan.py
index 6b9ad52..a184e10 100644
--- a/pym/euscan/handlers/url/cpan.py
+++ b/pym/euscan/handlers/cpan.py
@@ -3,7 +3,7 @@ import portage
import urllib2
import json
-from euscan import helpers, output
+from euscan import helpers, output, mangling
HANDLER_NAME = "cpan"
CONFIDENCE = 100
@@ -11,10 +11,8 @@ PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
-
-def can_handle(pkg, url):
- return url.startswith('mirror://cpan/')
-
+def can_handle(pkg, url=None):
+ return url and url.startswith('mirror://cpan/')
def guess_package(cp, url):
match = _cpan_package_name_re.search(url)
@@ -33,7 +31,7 @@ def guess_package(cp, url):
return pkg
-def gentoo_mangle_version(up_pv):
+def mangle_version(up_pv):
# clean
up_pv = up_pv.replace("._", "_") # e.g.: 0.999._002 -> 0.999_002
up_pv = up_pv.replace("_0.", "_") # e.g.: 0.30_0.1 -> 0.30_1
@@ -68,53 +66,25 @@ def gentoo_mangle_version(up_pv):
if rc_part:
pv = "%s_rc" % pv
- return helpers.gentoo_mangle_version(pv)
-
-
-def cpan_trim_version(pv):
- pv = re.sub('^[a-zA-Z]+', '', pv)
- pv = re.sub('[a-zA-Z]$', '', pv)
return pv
-
-def cpan_mangle_version(pv):
- pos = pv.find('.')
- if pos < 0:
- return pv
- up_pv = pv.replace('.', '')
- up_pv = up_pv[0:pos] + '.' + up_pv[pos:]
- up_pv = cpan_trim_version(up_pv)
- return up_pv
-
-
-def cpan_vercmp(cp, a, b):
- try:
- return float(a) - float(b)
- except:
- if a < b:
- return -1
- else:
- return 1
-
-
-def scan(pkg, url):
+def scan_url(pkg, url, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
remote_pkg = guess_package(cp, url)
output.einfo("Using CPAN API: %s", remote_pkg)
- result = scan_remote(pkg, [remote_pkg])
+ return scan_pkg(pkg, {'data' : remote_pkg})
- ret = []
- for url, pv in result:
- ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
- return ret
+def scan_pkg(pkg, options):
+ remote_pkg = options['data']
+ # Defaults to CPAN mangling rules
+ if 'versionmangle' not in options:
+ options['versionmangle'] = ['cpan', 'gentoo']
-def scan_remote(pkg, remote_data):
- remote_pkg = remote_data[0]
url = 'http://search.cpan.org/api/dist/%s' % remote_pkg
- cp, ver, rev = portage.pkgsplit(pkg.cpv)
+ cp, ver, rev = pkg.cp, pkg.version, pkg.revision
try:
fp = helpers.urlopen(url)
@@ -139,11 +109,9 @@ def scan_remote(pkg, remote_data):
# continue
up_pv = version['version']
- up_pv = cpan_trim_version(up_pv)
- pv = gentoo_mangle_version(up_pv)
- up_ver = cpan_mangle_version(ver)
+ pv = mangling.mangle_version(up_pv, options)
- if helpers.version_filtered(cp, up_ver, up_pv, cpan_vercmp):
+ if helpers.version_filtered(cp, ver, pv):
continue
url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % (
@@ -153,6 +121,7 @@ def scan_remote(pkg, remote_data):
version['archive']
)
- ret.append((url, pv))
+ url = mangling.mangle_url(url, options)
+ ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret
diff --git a/pym/euscan/handlers/url/generic.py b/pym/euscan/handlers/generic.py
index 3ba7ac0..76f598f 100644
--- a/pym/euscan/handlers/url/generic.py
+++ b/pym/euscan/handlers/generic.py
@@ -12,7 +12,7 @@ except ImportError:
import portage
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
- BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
+ BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers, mangling
HANDLER_NAME = "generic"
CONFIDENCE = 45
@@ -69,6 +69,7 @@ def scan_html(data, url, pattern):
(".".join([x for x in match.groups() if x is not None]),
match.group(0))
)
+
return results
@@ -87,7 +88,7 @@ def scan_ftp(data, url, pattern):
return results
-def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
+def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
if not steps:
return []
@@ -120,7 +121,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
versions = []
for up_pv, path in results:
- pv = helpers.gentoo_mangle_version(up_pv)
+ pv = mangling.mangle_version(up_pv, options)
+
if helpers.version_filtered(cp, ver, pv):
continue
if not url.endswith("/"):
@@ -129,16 +131,17 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
if not steps and path not in orig_url:
confidence = confidence_score(path, orig_url)
+ path = mangling.mangle_url(path, options)
versions.append((path, pv, HANDLER_NAME, confidence))
if steps:
- ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
+ ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url, options)
versions.extend(ret)
return versions
-def scan(pkg, url):
+def scan_url(pkg, url, options):
if CONFIG["scan-dir"]:
for bu in SCANDIR_BLACKLIST_URLS:
if re.match(bu, url):
@@ -171,7 +174,7 @@ def scan(pkg, url):
output.einfo("Scanning: %s" % template)
steps = helpers.generate_scan_paths(template)
- ret = scan_directory_recursive(cp, ver, rev, "", steps, url)
+ ret = scan_directory_recursive(cp, ver, rev, "", steps, url, options)
if not ret:
ret = brute_force(pkg, url)
diff --git a/pym/euscan/handlers/url/github.py b/pym/euscan/handlers/github.py
index dc5dd16..dfe2cee 100644
--- a/pym/euscan/handlers/url/github.py
+++ b/pym/euscan/handlers/github.py
@@ -4,16 +4,15 @@ import re
import portage
-from euscan import helpers, output
+from euscan import helpers, output, mangling
HANDLER_NAME = "github"
CONFIDENCE = 100
PRIORITY = 90
-def can_handle(pkg, url):
- return url.startswith('mirror://github/')
-
+def can_handle(pkg, url=None):
+ return url and url.startswith('mirror://github/')
def guess_package(cp, url):
match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url)
@@ -21,8 +20,7 @@ def guess_package(cp, url):
assert(match)
return (match.group(1), match.group(2), match.group(3))
-
-def scan(pkg, url):
+def scan_url(pkg, url, options):
'http://developer.github.com/v3/repos/downloads/'
user, project, filename = guess_package(pkg.cpv, url)
@@ -38,7 +36,8 @@ def scan(pkg, url):
fnre = re.compile('^%s$' % \
re.escape(filename).replace(re.escape(ver), '(.*?)'))
- output.einfo("Using github API for: " + '/'.join(filename))
+ output.einfo("Using github API for: project=%s user=%s filename=%s" % \
+ (project, user, filename))
dlreq = urllib2.urlopen('https://api.github.com/repos/%s/%s/downloads' % \
(user, project))
@@ -49,9 +48,10 @@ def scan(pkg, url):
m = fnre.match(dl['name'])
if m:
- pv = helpers.gentoo_mangle_version(m.group(1))
+ pv = mangling.mangle_version(m.group(1), options)
if helpers.version_filtered(cp, ver, pv):
continue
- ret.append((dl['html_url'], pv, HANDLER_NAME, CONFIDENCE))
+ url = mangling.mangle_url(dl['html_url'], options)
+ ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret
diff --git a/pym/euscan/handlers/url/kde.py b/pym/euscan/handlers/kde.py
index 5535158..b789b88 100644
--- a/pym/euscan/handlers/url/kde.py
+++ b/pym/euscan/handlers/kde.py
@@ -1,4 +1,4 @@
-from euscan.handlers.url import generic
+from euscan.handlers import generic
PRIORITY = 90
@@ -6,10 +6,7 @@ HANDLER_NAME = "kde"
def can_handle(pkg, url):
- if url.startswith('mirror://kde/'):
- return True
- return False
-
+ return url and url.startswith('mirror://kde/')
def clean_results(results):
ret = []
@@ -22,18 +19,18 @@ def clean_results(results):
return ret
-def scan(pkg, url):
+def scan_url(pkg, url):
results = generic.scan(pkg.cpv, url)
- if url.startswith('mirror://kde/unstable/'):
- url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
+ if generic.startswith('mirror://kde/unstable/'):
+ url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
results += generic.scan(pkg.cpv, url)
if not results: # if nothing was found go brute forcing
results = generic.brute_force(pkg.cpv, url)
- if url.startswith('mirror://kde/unstable/'):
- url = url.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
+ if generic.startswith('mirror://kde/unstable/'):
+ url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
results += generic.brute_force(pkg.cpv, url)
return clean_results(results)
diff --git a/pym/euscan/handlers/package/__init__.py b/pym/euscan/handlers/package/__init__.py
deleted file mode 100644
index 8530b10..0000000
--- a/pym/euscan/handlers/package/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Package wide handlers for scanning upstream
-"""
-
-import pkgutil
-
-handlers = []
-
-# autoimport all modules in this directory and append them to handlers list
-for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
- module = loader.find_module(module_name).load_module(module_name)
- handlers.append(module)
-
-# sort handlers by priority
-handlers = sorted(
- handlers,
- key=lambda handler: handler.PRIORITY,
- reverse=True
-)
diff --git a/pym/euscan/handlers/package/remote_id.py b/pym/euscan/handlers/package/remote_id.py
deleted file mode 100644
index 0615526..0000000
--- a/pym/euscan/handlers/package/remote_id.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from euscan.handlers.url import handlers
-from euscan import output
-
-PRIORITY = 100
-
-HANDLER_NAME = "remote_id"
-CONFIDENCE = 100.0
-
-
-url_handlers = {handler.HANDLER_NAME: handler for handler in handlers}
-
-
-def can_handle(pkg):
- # Return True if there's at least one remote-id that can be
- # handled by euscan
- try:
- remoteids = pkg.metadata.upstream()[0].upstream_remoteids()
- except IndexError:
- pass
- else:
- if len(remoteids) > 0:
- for remote_value, remote_type in remoteids:
- if remote_type in url_handlers:
- return True
- return False
-
-
-def scan(pkg):
- output.einfo("Using remote-id data")
-
- ret = []
-
- remoteids = pkg.metadata.upstream()[0].upstream_remoteids()
- for remote_value, remote_type in remoteids:
- if remote_type in url_handlers:
- remote_data = remote_value.split("/")
- scan_remote = getattr(
- url_handlers[remote_type], "scan_remote", None
- )
- if scan_remote:
- for url, pv in scan_remote(pkg, remote_data):
- name = "%s, %s" % (HANDLER_NAME, remote_type)
- ret.append((url, pv, name, CONFIDENCE))
- return ret
diff --git a/pym/euscan/handlers/package/watch.py b/pym/euscan/handlers/package/watch.py
deleted file mode 100644
index 14f25d2..0000000
--- a/pym/euscan/handlers/package/watch.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import re
-import urllib2
-
-import portage
-
-from euscan.handlers.url import generic
-from euscan import output, helpers
-
-PRIORITY = 100
-
-HANDLER_NAME = "watch"
-CONFIDENCE = 100.0
-
-
-is_pattern = r"\([^\/]+\)"
-
-
-def can_handle(pkg):
- try:
- return pkg.metadata._xml_tree.find("upstream").find("watch") \
- is not None
- except AttributeError:
- return False
-
-
-def parse_mangles(mangles, string):
- for mangle in mangles:
- # convert regex from perl format to python format
- # there are some regex in this format: s/pattern/replacement/
- m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
- if not m:
- # or in this format s|pattern|replacement|
- m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle)
- pattern, repl = m.groups()
- repl = re.sub(r"\$(\d+)", r"\\\1", repl)
- string = re.sub(pattern, repl, string)
- return string
-
-
-def clean_results(results, versionmangle, urlmangle):
- ret = []
-
- for path, version, _, _ in results:
- version = parse_mangles(versionmangle, version)
- path = parse_mangles(urlmangle, path)
- ret.append((path, version, HANDLER_NAME, CONFIDENCE))
-
- return ret
-
-
-def parse_watch(pkg):
- for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
- try:
- base, file_pattern = watch_tag.text.split(" ")[:2]
- except ValueError:
- base, file_pattern = watch_tag.text, None
-
- # the file pattern can be in the base url
- pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
- match = re.search(pattern_regex, base)
- if match:
- file_pattern = match.group(1)
- base = base.replace(file_pattern, "")
-
- # handle sf.net specially
- base = base.replace(
- "http://sf.net/", "http://qa.debian.org/watch/sf.php/"
- )
-
- vmangle = watch_tag.attrib.get("uversionmangle", None) or \
- watch_tag.attrib.get("versionmangle", None)
- versionmangle = vmangle.split(";") if vmangle else []
-
- umangle = watch_tag.attrib.get("downloadurlmangle", None)
- urlmangle = umangle.split(";") if umangle else []
-
- yield (base, file_pattern, versionmangle, urlmangle)
-
-
-def handle_directory_patterns(base, file_pattern):
- """
- Directory pattern matching
- e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
- file_pattern: nessus-core-([\d\.]+)\.tar\.gz
- """
- splitted = base.split("/")
- i = 0
- basedir = []
- for elem in splitted:
- if re.search(is_pattern, elem):
- break
- basedir.append(elem)
- i += 1
- basedir = "/".join(basedir)
- directory_pattern = splitted[i]
- final = "/".join(splitted[i + 1:])
-
- try:
- fp = helpers.urlopen(basedir)
- except urllib2.URLError:
- return []
- except IOError:
- return []
-
- if not fp:
- return []
-
- data = fp.read()
-
- if basedir.startswith("ftp://"):
- scan_data = generic.scan_ftp(data, basedir, directory_pattern)
- else:
- scan_data = generic.scan_html(data, basedir, directory_pattern)
-
- return [("/".join((basedir, path, final)), file_pattern)
- for _, path in scan_data]
-
-
-def scan(pkg):
- output.einfo("Using watch data")
-
- cp, ver, rev = portage.pkgsplit(pkg.cpv)
-
- results = []
- for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
- if not re.search(is_pattern, base):
- steps = [(base, file_pattern)]
- res = generic.scan_directory_recursive(
- cp, ver, rev, "", steps, base
- )
- else:
- res = []
- for step in handle_directory_patterns(base, file_pattern):
- res += generic.scan_directory_recursive(
- cp, ver, rev, "", [step], base
- )
-
- results += clean_results(res, versionmangle, urlmangle)
- return results
diff --git a/pym/euscan/handlers/pear.py b/pym/euscan/handlers/pear.py
new file mode 100644
index 0000000..2074e33
--- /dev/null
+++ b/pym/euscan/handlers/pear.py
@@ -0,0 +1,11 @@
+from euscan.handlers import php
+
+HANDLER_NAME = "pear"
+CONFIDENCE = 100
+PRIORITY = 90
+
+def can_handle(pkg, url=None):
+ return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
+
+scan_url = php.scan_url
+scan_pkg = php.scan_pkg
diff --git a/pym/euscan/handlers/pecl.py b/pym/euscan/handlers/pecl.py
new file mode 100644
index 0000000..cf372d2
--- /dev/null
+++ b/pym/euscan/handlers/pecl.py
@@ -0,0 +1,11 @@
+from euscan.handlers import php
+
+HANDLER_NAME = "pecl"
+CONFIDENCE = 100
+PRIORITY = 90
+
+def can_handle(pkg, url=None):
+ return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
+
+scan_url = php.scan_url
+scan_pkg = php.scan_pkg
diff --git a/pym/euscan/handlers/url/php.py b/pym/euscan/handlers/php.py
index d0fef71..1a0117a 100644
--- a/pym/euscan/handlers/url/php.py
+++ b/pym/euscan/handlers/php.py
@@ -3,23 +3,17 @@ import portage
import urllib2
import xml.dom.minidom
-from euscan import helpers, output
+from euscan import helpers, output, mangling
HANDLER_NAME = "php"
CONFIDENCE = 100
PRIORITY = 90
-
-def can_handle(pkg, url):
- if url.startswith('http://pear.php.net/get/'):
- return True
- if url.startswith('http://pecl.php.net/get/'):
- return True
+def can_handle(pkg, url=None):
return False
-
def guess_package_and_channel(cp, url):
- match = re.search('http://(.*)/get/(.*)-(.*).tgz', url)
+ match = re.search('http://(.*)\.php\.net/get/(.*)-(.*).tgz', url)
if match:
host = match.group(1)
@@ -30,12 +24,17 @@ def guess_package_and_channel(cp, url):
return pkg, host
-def scan(pkg, url):
- cp, ver, rev = portage.pkgsplit(pkg.cpv)
- package, channel = guess_package_and_channel(cp, url)
+def scan_url(pkg, url, options):
+ package, channel = guess_package_and_channel(pkg.cp, url)
+ return scan_pkg(pkg, {'type' : channel, 'data' : package })
+
+def scan_pkg(pkg, options):
+ cp, ver, rev = pkg.cp, pkg.version, pkg.revision
- orig_url = url
- url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, package.lower())
+ package = options['data']
+ channel = options['type']
+
+ url = 'http://%s.php.net/rest/r/%s/allreleases.xml' % (channel, package.lower())
output.einfo("Using: " + url)
@@ -58,14 +57,12 @@ def scan(pkg, url):
for node in nodes:
up_pv = node.childNodes[0].data
- pv = helpers.gentoo_mangle_version(up_pv)
+ pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
- url = 'http://%s/get/%s-%s.tgz' % (channel, package, up_pv)
-
- if url == orig_url:
- continue
+ url = 'http://%s.php.net/get/%s-%s.tgz' % (channel, package, up_pv)
+ url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
diff --git a/pym/euscan/handlers/url/pypi.py b/pym/euscan/handlers/pypi.py
index 02428ee..c49046c 100644
--- a/pym/euscan/handlers/url/pypi.py
+++ b/pym/euscan/handlers/pypi.py
@@ -3,15 +3,15 @@ import re
import portage
-from euscan import helpers, output
+from euscan import mangling, helpers, output
HANDLER_NAME = "pypi"
CONFIDENCE = 100
PRIORITY = 90
-def can_handle(pkg, url):
- return url.startswith('mirror://pypi/')
+def can_handle(pkg, url=None):
+ return url and url.startswith('mirror://pypi/')
def guess_package(cp, url):
@@ -24,19 +24,15 @@ def guess_package(cp, url):
return pkg
-def scan(pkg, url):
+def scan_url(pkg, url, options):
'http://wiki.python.org/moin/PyPiXmlRpc'
package = guess_package(pkg.cpv, url)
+ return scan_kg(pkg, [package])
- ret = []
- for urls, pv in scan_remote(pkg, [package]):
- ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
- return ret
-
-def scan_remote(pkg, remote_data):
- package = remote_data[0]
+def scan_pkg(pkg, options):
+ package = options['data']
output.einfo("Using PyPi XMLRPC: " + package)
@@ -52,10 +48,10 @@ def scan_remote(pkg, remote_data):
ret = []
for up_pv in versions:
- pv = helpers.gentoo_mangle_version(up_pv)
+ pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = client.release_urls(package, up_pv)
- urls = " ".join([infos['url'] for infos in urls])
- ret.append((urls, pv))
+ urls = " ".join([mangling.mangle_url(infos['url'], options) for infos in urls])
+ ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret
diff --git a/pym/euscan/handlers/url/rubygems.py b/pym/euscan/handlers/rubygems.py
index 3b4facd..7fd4c02 100644
--- a/pym/euscan/handlers/url/rubygems.py
+++ b/pym/euscan/handlers/rubygems.py
@@ -3,15 +3,15 @@ import portage
import json
import urllib2
-from euscan import helpers, output
+from euscan import helpers, output, mangling
HANDLER_NAME = "rubygems"
CONFIDENCE = 100
PRIORITY = 90
-def can_handle(pkg, url):
- return url.startswith('mirror://rubygems/')
+def can_handle(pkg, url=None):
+ return url and url.startswith('mirror://rubygems/')
def guess_gem(cpv, url):
@@ -29,7 +29,7 @@ def guess_gem(cpv, url):
return pkg
-def scan(pkg, url):
+def scan_url(pkg, url, options):
'http://guides.rubygems.org/rubygems-org-api/#gemversion'
gem = guess_gem(pkg.cpv, url)
@@ -41,14 +41,11 @@ def scan(pkg, url):
output.einfo("Using RubyGem API: %s" % gem)
- ret = []
- for url, pv in scan_remote(pkg, [gem]):
- ret.append(url, pv, HANDLER_NAME, CONFIDENCE)
- return ret
+ return scan_pkg(pkg, {'data' : gem})
-def scan_remote(pkg, remote_data):
- gem = remote_data[0]
+def scan_pkg(pkg, options):
+ gem = options['data']
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
try:
@@ -69,9 +66,10 @@ def scan_remote(pkg, remote_data):
ret = []
for version in versions:
up_pv = version['number']
- pv = helpers.gentoo_mangle_version(up_pv)
+ pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
- ret.append((url, pv))
+ url = mangling.mangle_url(url, options)
+ ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret
diff --git a/pym/euscan/handlers/url.py b/pym/euscan/handlers/url.py
new file mode 100644
index 0000000..b5b22da
--- /dev/null
+++ b/pym/euscan/handlers/url.py
@@ -0,0 +1,98 @@
+import re
+import urllib2
+
+import portage
+
+import generic
+from euscan import output, helpers
+
+PRIORITY = 100
+
+HANDLER_NAME = "url"
+CONFIDENCE = 100.0
+
+
+is_pattern = r"\([^\/]+\)"
+
+def can_handle(*args):
+ return False
+
+def handle_directory_patterns(base, file_pattern):
+ """
+ Directory pattern matching
+ e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
+ file_pattern: nessus-core-([\d\.]+)\.tar\.gz
+ """
+ splitted = base.split("/")
+ i = 0
+ basedir = []
+ for elem in splitted:
+ if re.search(is_pattern, elem):
+ break
+ basedir.append(elem)
+ i += 1
+ basedir = "/".join(basedir)
+ directory_pattern = splitted[i]
+ final = "/".join(splitted[i + 1:])
+
+ try:
+ fp = helpers.urlopen(basedir)
+ except urllib2.URLError:
+ return []
+ except IOError:
+ return []
+
+ if not fp:
+ return []
+
+ data = fp.read()
+
+ if basedir.startswith("ftp://"):
+ scan_data = generic.scan_ftp(data, basedir, directory_pattern)
+ else:
+ scan_data = generic.scan_html(data, basedir, directory_pattern)
+
+ return [("/".join((basedir, path, final)), file_pattern)
+ for _, path in scan_data]
+
+def read_options(options):
+ try:
+ base, file_pattern = options['data'].split(" ")[:2]
+ except ValueError:
+ base, file_pattern = options['data'], None
+
+ # the file pattern can be in the base url
+ pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
+ match = re.search(pattern_regex, base)
+ if match:
+ file_pattern = match.group(1)
+ base = base.replace(file_pattern, "")
+
+ # handle sf.net specially
+ base = base.replace(
+ "http://sf.net/", "http://qa.debian.org/watch/sf.php/"
+ )
+
+ return base, file_pattern
+
+def scan_pkg(pkg, options):
+ output.einfo("Using watch data")
+
+ cp, ver, rev = pkg.cp, pkg.version, pkg.revision
+
+ base, file_pattern = read_options(options)
+
+ results = []
+ if not re.search(is_pattern, base):
+ steps = [(base, file_pattern)]
+ results = generic.scan_directory_recursive(
+ cp, ver, rev, "", steps, base, options
+ )
+ else:
+ for step in handle_directory_patterns(base, file_pattern):
+ results += generic.scan_directory_recursive(
+ cp, ver, rev, "", [step], base, options
+ )
+
+ return results
+
diff --git a/pym/euscan/handlers/url/__init__.py b/pym/euscan/handlers/url/__init__.py
deleted file mode 100644
index 7328644..0000000
--- a/pym/euscan/handlers/url/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Url wide handlers for scanning upstream
-"""
-
-import pkgutil
-
-handlers = []
-
-# autoimport all modules in this directory and append them to handlers list
-for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
- module = loader.find_module(module_name).load_module(module_name)
- handlers.append(module)
-
-# sort handlers by priority
-handlers = sorted(
- handlers,
- key=lambda handler: handler.PRIORITY,
- reverse=True
-)
diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py
index ec721b7..ce5f2fb 100644
--- a/pym/euscan/helpers.py
+++ b/pym/euscan/helpers.py
@@ -34,120 +34,6 @@ _v_end = r'(?:(?:-|_)(?:pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((?:\d+)(?:(?:\.\d+)*)(?:[a-zA-Z]*?)(?:' + _v_end + '*))'
-# Stolen from g-pypi
-def gentoo_mangle_version(up_pv):
- """Convert PV to MY_PV if needed
-
- :param up_pv: Upstream package version
- :type up_pv: string
- :returns: pv
- :rtype: string
-
- Can't determine PV from upstream's version.
- Do our best with some well-known versioning schemes:
-
- * 1.0a1 (1.0_alpha1)
- * 1.0-a1 (1.0_alpha1)
- * 1.0b1 (1.0_beta1)
- * 1.0-b1 (1.0_beta1)
- * 1.0-r1234 (1.0_pre1234)
- * 1.0dev-r1234 (1.0_pre1234)
- * 1.0.dev-r1234 (1.0_pre1234)
- * 1.0dev-20091118 (1.0_pre20091118)
-
- Regex match.groups():
- * pkgfoo-1.0.dev-r1234
- * group 1 pv major (1.0)
- * group 2 replace this with portage suffix (.dev-r)
- * group 3 suffix version (1234)
-
- The order of the regexes is significant. For instance if you have
- .dev-r123, dev-r123 and -r123 you should order your regex's in
- that order.
-
- The chronological portage release versions are:
-
- * _alpha
- * _beta
- * _pre
- * _rc
- * release
- * _p
-
- **Example:**
-
- >>> gentoo_mangle_version('1.0b2')
- '1.0_beta2'
-
- .. note::
- The number of regex's could have been reduced, but we use four
- number of match.groups every time to simplify the code
-
- """
- bad_suffixes = re.compile(
- r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I)
- revision_suffixes = re.compile(
- r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I)
- suf_matches = {
- '_pre': [
- r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$',
- r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$',
- ],
- '_alpha': [
- r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$',
- r'(.*?)([\._-]*a[\._-]*)([0-9]*)$',
- r'(.*[^a-z])(a)([0-9]*)$',
- ],
- '_beta': [
- r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$',
- r'(.*?)([\._-]*b)([0-9]*)$',
- r'(.*[^a-z])(b)([0-9]*)$',
- ],
- '_rc': [
- r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$',
- r'(.*?)([\._-]*c[\._-]*)([0-9]*)$',
- r'(.*[^a-z])(c[\._-]*)([0-9]+)$',
- ],
- }
- rs_match = None
- pv = up_pv
- additional_version = ""
-
- rev_match = revision_suffixes.search(up_pv)
- if rev_match:
- pv = up_pv = rev_match.group(1)
- replace_me = rev_match.group(2)
- rev = rev_match.group(3)
- additional_version = '_p' + rev
-
- for this_suf in suf_matches.keys():
- if rs_match:
- break
- for regex in suf_matches[this_suf]:
- rsuffix_regex = re.compile(regex, re.I)
- rs_match = rsuffix_regex.match(up_pv)
- if rs_match:
- portage_suffix = this_suf
- break
-
- if rs_match:
- # e.g. 1.0.dev-r1234
- major_ver = rs_match.group(1) # 1.0
- replace_me = rs_match.group(2) # .dev-r
- rev = rs_match.group(3) # 1234
- pv = major_ver + portage_suffix + rev
- else:
- # Single suffixes with no numeric component are simply removed.
- match = bad_suffixes.search(up_pv)
- if match:
- suffix = match.groups()[0]
- pv = up_pv[: - (len(suffix))]
-
- pv = pv + additional_version
-
- return pv
-
-
def cast_int_components(version):
for i, obj in enumerate(version):
try:
@@ -520,7 +406,6 @@ def basedir_from_template(template):
return template[0:idx]
-
def generate_scan_paths(url):
prefix, chunks = url.split('://')
chunks = chunks.split('/')
diff --git a/pym/euscan/mangling.py b/pym/euscan/mangling.py
new file mode 100644
index 0000000..60534c4
--- /dev/null
+++ b/pym/euscan/mangling.py
@@ -0,0 +1,163 @@
+import re
+
+import euscan.handlers
+
+def apply_mangling_rule(mangle, string):
+ # convert regex from perl format to python format
+ # there are some regex in this format: s/pattern/replacement/
+ m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
+ if not m:
+ # or in this format s|pattern|replacement|
+ m = re.match(r"s\|(.*[^\\])\|(.*)\|", mangle)
+ if not m: # Not a known regex format
+ return string
+ pattern, repl = m.groups()
+ repl = re.sub(r"\$(\d+)", r"\\\1", repl)
+
+ return re.sub(pattern, repl, string)
+
+def apply_mangling_rules(kind, rules, string):
+ """
+ Apply multiple mangling rules (both sed-like and handlers)
+ in order
+ """
+
+ if kind not in rules:
+ return string
+
+ for rule in rules[kind]:
+ ret = None
+
+ # First try handlers rules
+ if rule == 'gentoo' and kind == 'version':
+ ret = gentoo_mangle_version(string)
+ elif kind == 'downloadurlmangle':
+ ret = euscan.handlers.mangle_url(rule, string)
+ elif kind == 'versionmangle':
+ ret = euscan.handlers.mangle_version(rule, string)
+
+ if ret is not None: # Use return value as new string if not None
+ string = ret
+ else: # Apply sed like rules
+ string = apply_mangling_rule(rule, string)
+
+ return string
+
+def mangle_version(up_pv, options):
+ return apply_mangling_rules('versionmangle', options, up_pv)
+
+def mangle_url(url, options):
+ return apply_mangling_rules('downloadurlmangle', options, url)
+
+# Stolen from g-pypi
+def gentoo_mangle_version(up_pv):
+ """Convert PV to MY_PV if needed
+
+ :param up_pv: Upstream package version
+ :type up_pv: string
+ :returns: pv
+ :rtype: string
+
+ Can't determine PV from upstream's version.
+ Do our best with some well-known versioning schemes:
+
+ * 1.0a1 (1.0_alpha1)
+ * 1.0-a1 (1.0_alpha1)
+ * 1.0b1 (1.0_beta1)
+ * 1.0-b1 (1.0_beta1)
+ * 1.0-r1234 (1.0_pre1234)
+ * 1.0dev-r1234 (1.0_pre1234)
+ * 1.0.dev-r1234 (1.0_pre1234)
+ * 1.0dev-20091118 (1.0_pre20091118)
+
+ Regex match.groups():
+ * pkgfoo-1.0.dev-r1234
+ * group 1 pv major (1.0)
+ * group 2 replace this with portage suffix (.dev-r)
+ * group 3 suffix version (1234)
+
+ The order of the regexes is significant. For instance if you have
+ .dev-r123, dev-r123 and -r123 you should order your regex's in
+ that order.
+
+ The chronological portage release versions are:
+
+ * _alpha
+ * _beta
+ * _pre
+ * _rc
+ * release
+ * _p
+
+ **Example:**
+
+ >>> gentoo_mangle_version('1.0b2')
+ '1.0_beta2'
+
+ .. note::
+ The number of regex's could have been reduced, but we use four
+ number of match.groups every time to simplify the code
+
+ """
+ bad_suffixes = re.compile(
+ r'((?:[._-]*)(?:dev|devel|final|stable|snapshot)$)', re.I)
+ revision_suffixes = re.compile(
+ r'(.*?)([\._-]*(?:r|patch|p)[\._-]*)([0-9]*)$', re.I)
+ suf_matches = {
+ '_pre': [
+ r'(.*?)([\._-]*dev[\._-]*r?)([0-9]+)$',
+ r'(.*?)([\._-]*(?:pre|preview)[\._-]*)([0-9]*)$',
+ ],
+ '_alpha': [
+ r'(.*?)([\._-]*(?:alpha|test)[\._-]*)([0-9]*)$',
+ r'(.*?)([\._-]*a[\._-]*)([0-9]*)$',
+ r'(.*[^a-z])(a)([0-9]*)$',
+ ],
+ '_beta': [
+ r'(.*?)([\._-]*beta[\._-]*)([0-9]*)$',
+ r'(.*?)([\._-]*b)([0-9]*)$',
+ r'(.*[^a-z])(b)([0-9]*)$',
+ ],
+ '_rc': [
+ r'(.*?)([\._-]*rc[\._-]*)([0-9]*)$',
+ r'(.*?)([\._-]*c[\._-]*)([0-9]*)$',
+ r'(.*[^a-z])(c[\._-]*)([0-9]+)$',
+ ],
+ }
+ rs_match = None
+ pv = up_pv
+ additional_version = ""
+
+ rev_match = revision_suffixes.search(up_pv)
+ if rev_match:
+ pv = up_pv = rev_match.group(1)
+ replace_me = rev_match.group(2)
+ rev = rev_match.group(3)
+ additional_version = '_p' + rev
+
+ for this_suf in suf_matches.keys():
+ if rs_match:
+ break
+ for regex in suf_matches[this_suf]:
+ rsuffix_regex = re.compile(regex, re.I)
+ rs_match = rsuffix_regex.match(up_pv)
+ if rs_match:
+ portage_suffix = this_suf
+ break
+
+ if rs_match:
+ # e.g. 1.0.dev-r1234
+ major_ver = rs_match.group(1) # 1.0
+ replace_me = rs_match.group(2) # .dev-r
+ rev = rs_match.group(3) # 1234
+ pv = major_ver + portage_suffix + rev
+ else:
+ # Single suffixes with no numeric component are simply removed.
+ match = bad_suffixes.search(up_pv)
+ if match:
+ suffix = match.groups()[0]
+ pv = up_pv[: - (len(suffix))]
+
+ pv = pv + additional_version
+
+ return pv
diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py
index f5e13cf..a55c6d9 100644
--- a/pym/euscan/scan.py
+++ b/pym/euscan/scan.py
@@ -44,24 +44,6 @@ def filter_versions(cp, versions):
]
-# gentoolkit stores PORTDB, so even if we modify it to add an overlay
-# it will still use the old dbapi
-def reload_gentoolkit():
- from gentoolkit import dbapi
- import gentoolkit.package
- import gentoolkit.query
-
- PORTDB = portage.db[portage.root]["porttree"].dbapi
- dbapi.PORTDB = PORTDB
-
- if hasattr(dbapi, 'PORTDB'):
- dbapi.PORTDB = PORTDB
- if hasattr(gentoolkit.package, 'PORTDB'):
- gentoolkit.package.PORTDB = PORTDB
- if hasattr(gentoolkit.query, 'PORTDB'):
- gentoolkit.query.PORTDB = PORTDB
-
-
def scan_upstream(query, on_progress=None):
"""
Scans the upstream searching new versions for the given query