From 64fb7ac1ff4fffe6c29e8eaa38967acc454a18ae Mon Sep 17 00:00:00 2001 From: Remi Collet Date: Sat, 26 Jul 2014 19:39:57 +0200 Subject: add parser to retrieve packagist package name from binary repository --- class/Parser.php | 183 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 142 insertions(+), 41 deletions(-) (limited to 'class/Parser.php') diff --git a/class/Parser.php b/class/Parser.php index 98fd997..fff7fb0 100644 --- a/class/Parser.php +++ b/class/Parser.php @@ -165,18 +165,15 @@ class Parser } /** - * Parse the content of all RPM repository + * Get the metadata of a RPM repository * - * @param TableRpm $rpmtable the table to write to * @param TableRpmRepo $rpmrepo the table to store timestamp * @param hastable $row the repo to read from * - * @return integer number of parsed line + * @return simplexml */ - static public function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row) + static private function readMetadata(TableRpmRepo $rpmrepo, Array $row) { - $tot = 0; - self::log("REPOSITORY " . $row['main'] . " " . $row['sub']); $TimRemote = 0; $repomd = @simplexml_load_file($row['url'] . "repodata/repomd.xml"); @@ -194,7 +191,6 @@ class Parser } else if ($TimRemote > $row['stamp']) { self::log("Loading $UrlRemote"); - //$fic=gzopen("primary.xml.gz", "r"); $fic=gzopen($UrlRemote, "r"); if ($fic) { $txt=""; @@ -205,49 +201,67 @@ class Parser gzclose($fic); $primary = simplexml_load_string($txt); - self::log("Read " . $primary->attributes() . " packages"); - unset($txt); + if ($primary) { + self::log("Read " . $primary->attributes() . " packages"); + $rpmrepo->update($row['id'], array('stamp' =>$TimRemote)); - $crit = array( - 'repo_main' => $row['main'], - 'repo_sub' => $row['sub'] - ); - $nb = $rpmtable->delete($crit); - self::log("Delete $nb packages"); - - foreach ($primary->package as $package) { - if ($package->attributes()=='rpm') { - $ver = $package->version->attributes(); - $loc = $package->location->attributes(); - - $input = array( - 'repo_main' => $row['main'], - 'repo_sub' => $row['sub'], - 'name' => $package->name, - 'epoch' => $ver['epoch'], - 'ver' => $ver['ver'], - 'rel' => $ver['rel'], - 'summary' => $package->summary, - 'location' => $loc['href'], - 'url' => $package->url - ); - if ($rpmtable->add($input)) { - $tot++; - } - } + return $primary; + } else { + self::log("ERROR : can't parse $UrlRemote"); } - self::log("Write $tot packages"); - - $rpmrepo->update($row['id'], array('stamp' =>$TimRemote)); - - unset($primary); } else { self::log("ERROR : can't read $UrlRemote"); } } else { self::log("no update needed : $TimRemote / " . $row['stamp']); } + return NULL; + } + /** + * Parse the content of all RPM repository + * + * @param TableRpm $rpmtable the table to write to + * @param TableRpmRepo $rpmrepo the table to store timestamp + * @param hastable $row the repo to read from + * + * @return integer number of parsed line + */ + static private function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row) + { + $tot = 0; + + $primary = self::readMetadata($rpmrepo, $row); + if ($primary) { + $crit = array( + 'repo_main' => $row['main'], + 'repo_sub' => $row['sub'] + ); + $nb = $rpmtable->delete($crit); + self::log("Delete $nb packages"); + foreach ($primary->package as $package) { + if ($package->attributes()=='rpm') { + $ver = $package->version->attributes(); + $loc = $package->location->attributes(); + + $input = array( + 'repo_main' => $row['main'], + 'repo_sub' => $row['sub'], + 'name' => $package->name, + 'epoch' => $ver['epoch'], + 'ver' => $ver['ver'], + 'rel' => $ver['rel'], + 'summary' => $package->summary, + 'location' => $loc['href'], + 'url' => $package->url + ); + if ($rpmtable->add($input)) { + $tot++; + } + } + } + self::log("Write $tot packages"); + } return $tot; } @@ -270,6 +284,93 @@ class Parser return $tot; } + /** + * Parse the content of one RPM repository for Provides + * + * @param TableRpmRepo $rpmrepo the table to store timestamp + * @param hastable $row the repo to read from + * @param hastable $result found packagist packages + * + * @return integer number of parsed line + */ + static private function readOneProv(TableRpmRepo $rpmrepo, Array $row, Array &$result) + { + $tot = 0; + + $primary = self::readMetadata($rpmrepo, $row); + if ($primary) { + $crit = array( + 'repo_main' => $row['main'], + 'repo_sub' => $row['sub'] + ); + + foreach ($primary->package as $package) { + if ($package->attributes()=='rpm') { + $srpm = false; + $composer = false; + foreach ($package->format->children('rpm', true) as $fmt) { + // get the source rpm name + if ($fmt->getName() == 'sourcerpm') { + if (preg_match('/^(.*)-([^-]*)-([^-]*)\.src\.rpm$/', $fmt, $reg)) { + $srpm = $reg[1]; + } + } + if ($fmt->getName() != 'provides') { + continue; + } + // Parse the provides + foreach ($fmt as $fmt2) { + $prov = $fmt2->attributes()['name']; + if (preg_match('/^php-composer\((.*)\)$/', $prov, $reg)) { + $composer = $reg[1]; + } + } + } + // Save composer name only for main package + if ($composer && $srpm && ($package->name == $srpm)) { + $result[$srpm] = $composer; + } + } + } + self::log("Read $tot packages"); + } + return $tot; + } + + /** + * Parse the content of all RPM repository for Provides + * + * @param TableRpm $rpmtable the table to write to + * @param TableRpmRepo $rpmrepo the table to read from + * @param Array $crit array for repo selection + * + * @return integer number of parsed line + */ + static public function readProvides(TablePackagist $pkgtable, TableRpmRepo $rpmrepo, Array $crit) + { + $tot = 0; + $result = array(); + + foreach ($rpmrepo->request($crit) as $row) { + self::readOneProv($rpmrepo, $row, $result); + } + if (count($result)) { + $pkgtable->truncate(); + foreach($result as $rpm => $pkg) { + $input = array( + 'rpmname' => $rpm, + 'pkgname' => $pkg, + ); + if ($pkgtable->add($input)) { + $tot++; + } + } + } + self::log("Write $tot packagist packages"); + + return $tot; + } + /** * Parse the PECL webservices * -- cgit