<?php

/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */

/**
 * Class for "repo" Table management
 *
 * PHP version 5
 *
 * Copyright © 2010-2014 Remi Collet
 *
 * This file is part of rpmphp.
 *
 * rpmphp is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * rpmphp is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with rpmphp.  If not, see <http://www.gnu.org/licenses/>.
 *
 * @category  Main
 * @package   RPMPHP
 *
 * @author    Remi Collet <unknown@unknwown.com>
 * @author    Johan Cwiklinski <johan@x-tnd.be>
 * @copyright 2010-2014 Remi Collet
 * @license   http://www.gnu.org/licenses/agpl-3.0-standalone.html AGPL License 3.0 or (at your option) any later version
 * @link      https://git.remirepo.net/cgit/web/rpmphp.git/
 * @since     The begining of times.
*/
class Parser
{
    /**
     * Display a message
     */
    static public function log($msg)
    {
        echo date("r : ") . $msg ."\n";
    }

    /**
     * Parse the Bugzilla ACL list from pkgdb
     *
     * @param TableAcl $acls the table to write to
     * @param string   $url  the file to read from
     *
     * @return integer number of parsed line
     */
    static public function readAcls(TableAcls $acls, $url)
    {
        $tot = 0;

        self::log("Read pkgdb/owner");
        $fic=fopen($url, 'r');
        if (!$fic) {
            self::log("ERROR reading '$url'");
        } else {
            $nb = $acls->getCount();
            $acls->truncate();
            self::log("Delete $nb owners");

            for ($tot=0 ; $line=fgetcsv($fic, 1024, '|'); ) {
                if (count($line)>5 && substr($line[0], 0, 1)!='#') {
                    for ($i=0; $i<6; $i++) {
                        $line[$i]=trim($line[$i]);
                    }
                    $input = array(
                        'collection'    => $line[0],
                        'name'          => $line[1],
                        'summary'       => $line[2],
                    );
                    if (!empty($line[3])) {
                        $input['owner'] = $line[3];
                    }
                    if (!empty($line[4])) {
                        $input['qa'] = $line[3];
                    }
                    if (!empty($line[5])) {
                        $input['cc'] = $line[3];
                    }
                    if ($acls->add($input)) {
                        $tot++;
                    }
                }
            }
            fclose($fic);
            self::log("wrote $tot package's owner");
        }
        return $tot;
    }

    /**
     * Parse the content of a R repository
     *
     * @param TableUpstream $uptable the table to write to
     * @param hastable      $repo    the repo to read from
     *
     * @return integer number of parsed line
     */
    static private function readOneR(TableUpstream $uptable, Array $repo)
    {
        $tot = 0;

        self::log("Reading " . $repo["name"] . " (" .
            $repo["state"] . ")");
        $index = @file_get_contents($repo["url"]);
        if (!$index) {
            self::log("Can't read [" . $repo["url"] . "], skip this channel");
            return 0;
        }
        if ($repo["state"]=="stable") {
            $crit = array('type'=>'R', 'channel'=>$repo['name']);
            $nb = $uptable->delete($crit);
            self::log("Delete $nb packages");
        }
        $results=array();
        $pat = '/Package: *(.*)\nVersion: *(.*)\n/i';
        if (preg_match_all($pat, $index, $results, PREG_SET_ORDER)) {
            foreach ($results as $result) {
                $rpmname = "R-".$result[1];
                $ver = str_replace('-', '.', $result[2]);
                $add = $uptable->record(
                    "R",
                    $repo["name"],
                    $rpmname,
                    $ver,
                    $repo["state"]=='stable',
                    ($repo["state"]=="stable"?"":"devel")
                );

                if ($add) {
                    $tot++;
                }
            }
            self::log("Write $tot packages in this channel");
        } else {
            self::log("No package in this channel");
        }
        return $tot;
    }

    /**
     * Parse the content of all R repository
     *
     * @param TableUpstream $uptable the table to write to
     * @param TableRRepo    $rrepo   the table to read from
     *
     * @return integer number of parsed line
     */
    static public function readR(TableUpstream $uptable, TableRRepo $rrepo)
    {
        $tot = 0;

        foreach ($rrepo->request() as $repo) {
            $tot += self::readOneR($uptable, $repo);
        }
        self::log("Write $tot packages in all channels");
        return $tot;
    }

    /**
     * Get the metadata of a RPM repository
     *
     * @param TableRpmRepo  $rpmrepo    the table to store timestamp
     * @param hastable      $row        the repo to read from
     *
     * @return simplexml
     */
    static private function readMetadata(TableRpmRepo $rpmrepo, Array $row)
    {
        self::log("REPOSITORY " . $row['main'] . " " . $row['sub']);
        $TimRemote = 0;
        $repomd = @simplexml_load_file($row['url'] . "repodata/repomd.xml");
        if ($repomd) {
            foreach ($repomd->data as $data) {
                if ($data->attributes()=="primary") {
                    $TimRemote = $data->timestamp;
                    $UrlRemote = $row['url'] . $data->location->attributes();
                }
            }
        }
        if (!$TimRemote) {
            self::log("Can't read " . $row['url']);

        } else if ($TimRemote > $row['stamp']) {
            self::log("Loading $UrlRemote");

            $txt = file_get_contents($UrlRemote);
            if (!$txt) {
                self::log("ERROR : can't read $UrlRemote");
            } else if (str_ends_with($UrlRemote, '.xml')) {
                // OK
            } else if (str_ends_with($UrlRemote, '.xz')) {
                if (function_exists('xzdecode')) {
                    $txt = xzdecode($txt);
                } else {
                    self::log("ERROR : missing xz php extension");
                    return NULL;
                }
            } else if (str_ends_with($UrlRemote, '.zst')) {
                if (function_exists('zstd_uncompress')) {
                    $txt = zstd_uncompress($txt);
                } else {
                    self::log("ERROR : missing zstd php extension");
                    return NULL;
                }
            } else if (str_ends_with($UrlRemote, '.gz')) {
                $txt = gzdecode($txt);
            } else {
                    self::log("ERROR : unkown compression");
                    return NULL;
            }
            if ($txt) {
                $primary = simplexml_load_string($txt);
                if ($primary) {
                    self::log("Read " . $primary->attributes() . " packages from primary");
                    $rpmrepo->update($row['id'], array('stamp' =>$TimRemote));

                    return $primary;
                } else {
                    self::log("ERROR : can't parse $UrlRemote");
                }
            } else {
                self::log("ERROR : can't uncompress $UrlRemote");
            }
        } else {
            self::log("no update needed : $TimRemote / " . $row['stamp']);
        }
        return NULL;
    }
    /**
     * Parse the content of all RPM repository
     *
     * @param TableRpm      $rpmtable   the table to write to
     * @param TableRpmRepo  $rpmrepo    the table to store timestamp
     * @param hastable      $row        the repo to read from
     *
     * @return integer number of parsed line
     */
    static private function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row)
    {
        $tot = 0;

        $primary = self::readMetadata($rpmrepo, $row);
        if ($primary) {
            $crit = array(
                'repo_main' => $row['main'],
                'repo_sub'  => $row['sub']
            );
            $nb = $rpmtable->delete($crit);
            self::log("Delete $nb packages");

            foreach ($primary->package as $package) {
                if ($package->attributes()=='rpm') {
                    $ver = $package->version->attributes();
                    $loc = $package->location->attributes();

                    $input = array(
                        'repo_main' => $row['main'],
                        'repo_sub'  => $row['sub'],
                        'name'      => $package->name,
                        'epoch'     => $ver['epoch'],
                        'ver'       => $ver['ver'],
                        'rel'       => $ver['rel'],
                        'summary'   => $package->summary,
                        'location'  => $loc['href'],
                        'url'       => $package->url
                    );
                    if ($rpmtable->add($input)) {
                        $tot++;
                    }
                }
            }
            self::log("Write $tot packages");
        }
        return $tot;
    }

    /**
     * Parse the content of all RPM repository
     *
     * @param TableRpm      $rpmtable   the table to write to
     * @param TableRpmRepo  $rpmrepo    the table to read from
     * @param Array         $crit       array for repo selection
     *
     * @return integer number of parsed line
     */
    static public function readRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $crit)
    {
        $tot = 0;

        foreach ($rpmrepo->request($crit) as $row) {
            $tot += self::ReadOneRpm($rpmtable, $rpmrepo, $row);
        }
        return $tot;
    }

    /**
     * Parse the content of one RPM repository for Provides
     *
     * @param TableRpmRepo  $rpmrepo    the table to store timestamp
     * @param hastable      $row        the repo to read from
     * @param hastable      $result     found packagist packages
     *
     * @return integer number of parsed line
     */
    static private function readOneProv(TableRpmRepo $rpmrepo, Array $row, Array &$result)
    {
        $tot = 0;

        $excl = array('udan11/sql-parser');

        $primary = self::readMetadata($rpmrepo, $row);
        if ($primary) {
            foreach ($primary->package as $package) {
                if ($package->attributes()=='rpm') {
                    $srpm = false;
                    $composer = false;
                    foreach ($package->format->children('rpm', true) as $fmt) {
                        // get the source rpm name
                        if ($fmt->getName() == 'sourcerpm') {
                            if (preg_match('/^(.*)-([^-]*)-([^-]*)\.src\.rpm$/', $fmt, $reg)) {
                                $srpm = $reg[1];
                            }
                        }
                        if ($fmt->getName() != 'provides') {
                            continue;
                        }
                        // Parse the provides
                        foreach ($fmt as $fmt2) {
                            $prov = $fmt2->attributes()['name'];
                            if (preg_match('/^php-(composer|pie)\((.*)\)$/', $prov, $reg)) {
                                $type = $reg[1];
                                $name = $reg[2];
                                if (!strpos($name, '/')) {
                                    continue;
                                }
                                list($vend, $proj) = explode('/', $name, 2);
                                if ($vend == 'zendframework') {
                                    if (!$composer) { // only if empty to keep laminas
                                        $composer = $name;
                                    }
                                } else if (substr($reg[1], 0, 4) != 'ext-'
                                    && substr($name, -15)   != '-implementation'
                                    && !in_array($reg[1], $excl)) {
                                    $composer = $name;
                                }
                            }
                        }
                    }
                    // Save composer name only for main package
                    if ($composer && $srpm && ($package->name == $srpm)) {
                        $result[$srpm] = $composer;
                    }
                }
            }
            self::log("Read $tot packages for provides");
        }
        return $tot;
    }

    /**
     * Parse the content of all RPM repository for Provides
     *
     * @param TableRpm      $rpmtable   the table to write to
     * @param TableRpmRepo  $rpmrepo    the table to read from
     * @param Array         $crit       array for repo selection
     *
     * @return integer number of parsed line
     */
    static public function readProvides(TablePackagist $pkgtable, TableRpmRepo $rpmrepo, Array $crit)
    {
        $tot = 0;
        $result = array();

        foreach ($rpmrepo->request($crit) as $row) {
            self::readOneProv($rpmrepo, $row, $result);
        }
        if (count($result)) {
            $pkgtable->truncate();
            foreach($result as $rpm => $pkg) {
                $input = array(
                    'rpmname' => $rpm,
                    'pkgname' => $pkg,
                );
                try {
                    if ($pkgtable->add($input)) {
                        $tot++;
                    }
                } catch (Exception $e) {
                    # ignore duplicate key for now
                    # ex pimple/pimple provided by php-pimple1 and php-pimple
                }
            }
        }
        self::log("Write $tot packagist packages");

        return $tot;
    }

    /**
     * Parse the PECL webservices
     *
     * @param TableUpstream $uptable the table to write to
     * @param string        $url     the URL to read from
     *
     * @return integer number of parsed line
     */
    static public function readPecl(TableUpstream $uptable, $url)
    {
        return self::readOnePear($uptable, 'pecl', $url);
        /*
        self::log("PECL listLatestReleases - stable");

        $request = xmlrpc_encode_request("package.listLatestReleases", "stable");
        $context = stream_context_create(
            array(
                'http' => array(
                    'method' => "POST",
                    'header' => "Content-Type: text/xml",
                    'content' => $request
                )
            )
        );
        $file = file_get_contents($url, false, $context);
        if (!$file) {
            self::log("Can't file_get_contents($url)");
            return 0;
        }

        $stable = xmlrpc_decode($file);
        if (xmlrpc_is_fault($stable)) {
            self::log("ERROR xmlrpc: $stable[faultString] ($stable[faultCode])");
        } else {
            $nb = $uptable->delete(array('type'=>'pecl', 'channel'=>'pecl'));
            self::log("Delete $nb packages");

            $nb=0;
            foreach ($stable as $name => $info) {
                $rpmname="php-pecl-".str_replace("_", "-", $name);

                $id = $uptable->record(
                    'pecl',
                    'pecl',
                    $rpmname,
                    $info["version"],
                    true
                );
                if ($id) {
                    $nb++;
                }
            }
            self::log("Write $nb packages");
        }

        // -------------------------------------------------------------------
        self::log("PECL listLatestReleases - unstable");

        $request = xmlrpc_encode_request("package.listLatestReleases", array());
        $context = stream_context_create(
            array(
                'http' => array(
                    'method' => "POST",
                    'header' => "Content-Type: text/xml",
                    'content' => $request
                )
            )
        );
        $file = file_get_contents($url, false, $context);
        if (!$file) {
            self::log("Can't file_get_contents($url)");
            return 0;
        }
        $unstable = xmlrpc_decode($file);
        if (xmlrpc_is_fault($unstable)) {
            self::log("ERROR xmlrpc: $stable[faultString] ($stable[faultCode])");
        } else {
            $nb=0;
            foreach ($unstable as $name => $info) {
                $rpmname="php-pecl-".str_replace("_", "-", $name);

                $id = $uptable->record(
                    'pecl',
                    'pecl',
                    $rpmname,
                    $info["version"],
                    true,
                    $info["state"]
                );
                if ($id) {
                    $nb++;
                }
            }
            self::log("Write $nb packages");
        }
        return $nb;
        */
    }

    /**
     * Parse the content of all Packagist repository
     *
     * @param TableUpstream  $uptable   the table to write to
     * @param TablePackagist $pktable   the table to read from
     *
     * @return integer number of parsed line
     */
    static public function readPackagist(TableUpstream $uptable, TablePackagist $pktable)
    {
        self::log("Packagist search releases");
        $pk = new PackagistClient();

        $nb = $uptable->delete(array('type'=>'composer'));
        self::log("Delete $nb packages");

        $tot = 0;
        foreach($pktable->request(array('ORDER'=>'rpmname')) as $rec) {
            if ($rep = $pk->getPackage($rec['pkgname'])) {
                $v = explode('/', $rec['pkgname']);
                switch(count($v)) {
                    case 3:
                        $vendor = $v[0] . '/'.$v[1];
                        break;
                    case 2:
                        $vendor = "packagist/" . $v[0];
                        break;
                    default:
                        $vendor = "packagist";
                }
                $id = false;
                if ($rep['stable']) {
                    $id = $uptable->record(
                        'composer',
                        $vendor,
                        $rec['rpmname'],
                        $rep['stable'],
                        true
                    );
                }
                if ($rep['unstable']) {
                    $id = $uptable->record(
                        'composer',
                        $vendor,
                        $rec['rpmname'],
                        $rep['unstable'],
                        false,
                        $rep['state']
                    );
                }
                if ($id) {
                    $tot++;
                }
            }
        }
        self::log("Write $tot packages");

        return $tot;
    }

    /**
     * Parse the content of all PEAR repository
     *
     * @param TableUpstream $uptable        the table to write to
     * @param string        $channelname    the channel name
     * @param string        $channelurl     the channel URL
     *
     * @return integer number of parsed line
     */
    static public function readOnePear(TableUpstream $uptable, $channelname, $channelurl)
    {
        $type = ($channelname=='pecl' ? 'pecl' : 'pear');

        $channel = @simplexml_load_file("http://$channelurl/channel.xml");
        if (!$channel) {
            self::log("can't read PEAR site (channel of $channelname)");
            return 0;
        }

        $rest = $channel->servers->primary->rest->baseurl[0];
        self::log("PEAR reading channel=$channelname, baseurl = $rest");

        $categories = @simplexml_load_file($rest."c/categories.xml");
        if (!$categories) {
            self::log("can't read PEAR site (categories)");
            return 0;
        }

        $crit = array('type' => $type, 'channel' => $channelname);
        $nb = $uptable->delete($crit);
        self::log("Delete $nb packages");

        $nb=0;
        if (!isset($categories->c[0])) {
            self::log("Reading ALL"); // ezc only
            $pitxt = @file_get_contents($rest."p/packages.xml");
            if (!$pitxt) {
                self::log("can't read PEAR site (".$rest."p/packagesinfo.xml)");
                return 0;
            }
            $allpi = @simplexml_load_string($pitxt);
            foreach ($allpi->p as $name) {
                $pitxt = @file_get_contents(
                    $rest."r/".strtolower($name)."/allreleases.xml"
                );
                if (!$pitxt) {
                    self::log(
                        "can't read PEAR site (".$rest."r/".
                        strtolower($name)."/allreleases.xml"
                    );
                    continue;
                }
                $pi = @simplexml_load_string($pitxt);

                $rpmname1="php-".$channelname."-".
                    str_replace("_", "-", $name);
                $rpmname2="php-".$channelname."-".$name;

                $uptable->record(
                    $type,
                    $channelname,
                    $rpmname1,
                    (string)$pi->r[0]->v,
                    false,
                    (string)$pi->r[0]->s
                );
                $uptable->record(
                    $type,
                    $channelname,
                    $rpmname2,
                    (string)$pi->r[0]->v,
                    false,
                    (string)$pi->r[0]->s
                );
                foreach ($pi->r as $rev) {
                    if ($rev->s=='stable') {
                        $uptable->record(
                            $type,
                            $channelname,
                            $rpmname1,
                            (string)$rev->v,
                            true
                        );
                        $uptable->record(
                            $type,
                            $channelname,
                            $rpmname2,
                            (string)$rev->v,
                            true
                        );
                        break;
                    }
                }
                $nb++;
            }

        } else {
            foreach ($categories->c as $cat) {
                self::log("Reading $cat");

                $pitxt = @file_get_contents(
                    $rest."c/".urlencode($cat)."/packagesinfo.xml"
                );
                if (!$pitxt) {
                    self::log(
                        "can't read PEAR site (".$rest."c/".
                        urlencode($cat)."/packagesinfo.xml)"
                    );
                    continue;
                }
                $pitxt = "<?" . preg_replace(
                    "/<\?xml.*?>/U",
                    "",
                    str_replace("\r\n", "\n", substr($pitxt, 2))
                );
                $pitxt = str_replace("\xA0","", $pitxt);

                $pi = @simplexml_load_string($pitxt);
                if (!$pi) {
                    self::log("can't read response ($cat)");
                    continue;
                }
                foreach ($pi->pi as $ps) {
                    if (isset($ps->p->n) && isset($ps->a->r)) {
                        $name=(string)$ps->p->n;

                        if ($channelname=='phing' && $name=='phing') {
                            $rpmname1="php-pear-phing";
                        } else if ($channelname=='phpunit' && $name=='PHPUnit') {
                            $rpmname1="php-pear-PHPUnit";
                        } else if ($channelname=='pecl' && $name=='pecl_http') {
                            $rpmname1="php-pecl-http";
                        } else {
                            $rpmname1="php-".$channelname."-".
                                str_replace("_", "-", $name);
                        }
                        $rpmname2="php-".$channelname."-".$name;

                        $uptable->record(
                            $type,
                            $channelname,
                            $rpmname1,
                            (string)$ps->a->r[0]->v,
                            false,
                            (string)$ps->a->r[0]->s
                        );
                        $uptable->record(
                            $type,
                            $channelname,
                            $rpmname2,
                            (string)$ps->a->r[0]->v,
                            false,
                            (string)$ps->a->r[0]->s
                        );
                        foreach ($ps->a->r as $rev) {
                            if ($rev->s=='stable') {
                                $uptable->record(
                                    $type,
                                    $channelname,
                                    $rpmname1,
                                    (string)$rev->v,
                                    true
                                );
                                $uptable->record(
                                    $type,
                                    $channelname,
                                    $rpmname2,
                                    (string)$rev->v,
                                    true
                                );
                                break;
                            }
                        }
                        $nb++;
                    }
                }
            }
        }
        self::log("read $nb packages in $channelname");
        return $nb;

    }
    /**
     * Parse the content of all PEAR repository
     *
     * @param TableUpstream $uptable the table to write to
     * @param TablePearRepo $pear    the table to read from
     *
     * @return integer number of parsed line
     */
    static public function readPear(TableUpstream $uptable, TablePearRepo $pear)
    {
        $tot = 0;

        self::log("PEAR reading channels");

        $channels = $pear->getAllRepo(true);
        foreach ($channels as $channelname => $channelurl) {
            $tot += self::readOnePear($uptable, $channelname, $channelurl);
        }

        self::log("Write $tot packages in all channels");
        return $tot;
    }

}
?>