Spade

Mini Shell

Directory:~$ /home/lmsyaran/public_html/joomla5/libraries/vendor/wamania/php-stemmer/src/Stemmer/
Upload File

[Home] [System Details] [Kill Me]
Current File:~$ /home/lmsyaran/public_html/joomla5/libraries/vendor/wamania/php-stemmer/src/Stemmer/Italian.php

<?php

namespace Wamania\Snowball\Stemmer;

use voku\helper\UTF8;

/**
 *
 * @link http://snowball.tartarus.org/algorithms/italian/stemmer.html
 * @author wamania
 *
 */
class Italian extends Stem
{
    /**
     * All Italian vowels
     */
    protected static $vowels = array('a', 'e',
'i', 'o', 'u', 'à',
'è', 'ì', 'ò', 'ù');

    /**
     * {@inheritdoc}
     */
    public function stem($word)
    {
        // we do ALL in UTF-8
        if (!UTF8::is_utf8($word)) {
            throw new \Exception('Word must be in UTF-8');
        }

        $this->plainVowels = implode('', self::$vowels);

        $this->word = UTF8::strtolower($word);

        // First, replace all acute accents by grave accents.
        $this->word = UTF8::str_replace(array('á',
'é', 'í', 'ó', 'ú'),
array('à', 'è', 'ì', 'ò',
'ù'), $this->word);

        //And, as in French, put u after q, and u, i between vowels into
upper case. (See note on vowel marking.) The vowels are then
        $this->word = preg_replace('#([q])u#u',
'$1U', $this->word);
        $this->word =
preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u',
'$1U$2', $this->word);
        $this->word =
preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u',
'$1I$2', $this->word);

        $this->rv();
        $this->r1();
        $this->r2();

        $this->step0();

        $word = $this->word;
        $this->step1();

        //Do step 2 if no ending was removed by step 1.
        if ($word == $this->word) {
            $this->step2();
        }

        $this->step3a();
        $this->step3b();
        $this->finish();

        return $this->word;
    }

    /**
     * Step 0: Attached pronoun
     */
    private function step0()
    {
        // Search for the longest among the following suffixes
        if ( ($position = $this->search(array(
            'gliela', 'gliele', 'glieli',
'glielo', 'gliene',
            'sene', 'mela', 'mele',
'meli', 'melo', 'mene', 'tela',
'tele', 'teli', 'telo', 'tene',
'cela',
            'cele', 'celi', 'celo',
'cene', 'vela', 'vele', 'veli',
'velo', 'vene',
            'gli', 'la', 'le',
'li', 'lo', 'mi', 'ne',
'si', 'ti', 'vi', 'ci'))) !==
false) {

            $suffixe = UTF8::substr($this->word, $position);

            // following one of (in RV)
             // a
            $a = array('ando', 'endo');
            $a = array_map(function($item) use ($suffixe) {
                return $item . $suffixe;
            }, $a);
            // In case of (a) the suffix is deleted
            if ($this->searchIfInRv($a) !== false) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }

            //b
            $b = array('ar', 'er', 'ir');
            $b = array_map(function($item) use ($suffixe) {
                return $item . $suffixe;
            }, $b);
            // in case (b) it is replace by e
            if ($this->searchIfInRv($b) !== false) {
                $this->word =
preg_replace('#('.$suffixe.')$#u', 'e',
$this->word);
            }

            return true;
        }

        return false;
    }

    /**
     * Step 1: Standard suffix removal
     */
    private function step1()
    {
        // amente
        //      delete if in R1
        //      if preceded by iv, delete if in R2 (and if further preceded
by at, delete if in R2), otherwise,
        //      if preceded by os, ic or abil, delete if in R2
        if ( ($position = $this->search(array('amente'))) !==
false) {
            if ($this->inR1($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }

            // if preceded by iv, delete if in R2 (and if further preceded
by at, delete if in R2), otherwise,
            if ( ($position2 =
$this->searchIfInR2(array('iv'))) !== false) {
                $this->word = UTF8::substr($this->word, 0,
$position2);
                if ( ($position3 =
$this->searchIfInR2(array('at'))) !== false) {
                    $this->word = UTF8::substr($this->word, 0,
$position3);
                }

                // if preceded by os, ic or ad, delete if in R2
            } elseif ( ($position4 =
$this->searchIfInR2(array('os', 'ic',
'abil'))) != false) {
                $this->word = UTF8::substr($this->word, 0,
$position4);
            }
            return true;
        }

        // delete if in R2
        if ( ($position = $this->search(array(
            'ibili', 'atrice', 'abili',
'abile', 'ibile', 'atrici',
'mente',
            'anza', 'anze', 'iche',
'ichi', 'ismo', 'ismi', 'ista',
'iste', 'isti', 'istà', 'istè',
'istì', 'ante', 'anti',
            'ico', 'ici', 'ica',
'ice', 'oso', 'osi', 'osa',
'ose'
        ))) !== false) {

            if ($this->inR2($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }
            return true;
        }

        // azione   azioni   atore   atori
        //      delete if in R2
        //      if preceded by ic, delete if in R2
        if ( ($position = $this->search(array('azione',
'azioni', 'atore', 'atori'))) !== false) {
            if ($this->inR2($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);

                if ( ($position2 = $this->search(array('ic')))
!== false) {
                    if ($this->inR2($position2)) {
                        $this->word = UTF8::substr($this->word, 0,
$position2);
                    }
                }
            }
            return true;
        }

        // logia   logie
        //      replace with log if in R2
        if ( ($position = $this->search(array('logia',
'logie'))) !== false) {
            if ($this->inR2($position)) {
                $this->word =
preg_replace('#(logia|logie)$#u', 'log',
$this->word);
            }
            return true;
        }

        // uzione   uzioni   usione   usioni
        //      replace with u if in R2
        if ( ($position = $this->search(array('uzione',
'uzioni', 'usione', 'usioni'))) !== false) {
            if ($this->inR2($position)) {
                $this->word =
preg_replace('#(uzione|uzioni|usione|usioni)$#u', 'u',
$this->word);
            }
            return true;
        }

        // enza   enze
        //      replace with ente if in R2
        if ( ($position = $this->search(array('enza',
'enze'))) !== false) {
            if ($this->inR2($position)) {
                $this->word = preg_replace('#(enza|enze)$#u',
'ente', $this->word);
            }
            return true;
        }

        // amento   amenti   imento   imenti
        //      delete if in RV
        if ( ($position = $this->search(array('amento',
'amenti', 'imento', 'imenti'))) !== false) {
            if ($this->inRv($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }
            return true;
        }

        // ità
        //      delete if in R2
        //      if preceded by abil, ic or iv, delete if in R2
        if ( ($position = $this->search(array('ità'))) !==
false) {
            if ($this->inR2($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }

            if ( ($position2 =
$this->searchIfInR2(array('abil', 'ic',
'iv'))) != false) {
                $this->word = UTF8::substr($this->word, 0,
$position2);
            }
            return true;
        }

        // ivo   ivi   iva   ive
        //      delete if in R2
        //      if preceded by at, delete if in R2 (and if further preceded
by ic, delete if in R2)
        if ( ($position = $this->search(array('ivo',
'ivi', 'iva', 'ive'))) !== false) {
            if ($this->inR2($position)) {
                $this->word = UTF8::substr($this->word, 0,
$position);
            }

            if ( ($position2 =
$this->searchIfInR2(array('at'))) !== false) {
                $this->word = UTF8::substr($this->word, 0,
$position2);
                if ( ($position3 =
$this->searchIfInR2(array('ic'))) !== false) {
                    $this->word = UTF8::substr($this->word, 0,
$position3);
                }
            }
            return true;
        }

        return false;
    }

    /**
     * Step 2: Verb suffixes
     * Search for the longest among the following suffixes in RV, and if
found, delete.
     */
    private function step2()
    {
        if ( ($position = $this->searchIfInRv(array(
            'assimo', 'assero', 'eranno',
'erebbero', 'erebbe', 'eremmo',
'ereste', 'eresti', 'essero',
'iranno', 'irebbero', 'irebbe',
'iremmo',
            'iscano', 'ireste', 'iresti',
'iscono', 'issero',
            'avamo', 'arono', 'avano',
'avate', 'eremo', 'erete', 'erono',
'evamo', 'evano', 'evate', 'ivamo',
'ivano', 'ivate', 'iremo', 'irete',
'irono',
            'ammo', 'ando', 'asse',
'assi', 'emmo', 'enda', 'ende',
'endi', 'endo', 'erai', 'erei',
'Yamo', 'iamo', 'immo', 'irà',
'irai', 'irei',
            'isca', 'isce', 'isci',
'isco',
            'ano', 'are', 'ata',
'ate', 'ati', 'ato', 'ava',
'avi', 'avo', 'erà', 'ere',
'erò', 'ete', 'eva',
            'evi', 'evo', 'ire',
'ita', 'ite', 'iti', 'ito',
'iva', 'ivi', 'ivo', 'ono',
'uta', 'ute', 'uti', 'uto',
'irò', 'ar', 'ir'))) !== false) {

            $this->word = UTF8::substr($this->word, 0, $position);
        }
    }

    /**
     * Step 3a
     * Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a
preceding i if it is in RV
     */
    private function step3a()
    {
        if ($this->searchIfInRv(array('a', 'e',
'i', 'o', 'à', 'è',
'ì', 'ò')) !== false) {
            $this->word = UTF8::substr($this->word, 0, -1);

            if ($this->searchIfInRv(array('i')) !== false) {
                $this->word = UTF8::substr($this->word, 0, -1);
            }
            return true;
        }
        return false;
    }

    /**
     * Step 3b
     * Replace final ch (or gh) with c (or g) if in RV (crocch -> crocc)
     */
    private function step3b()
    {
        if ($this->searchIfInRv(array('ch')) !== false) {
            $this->word = preg_replace('#(ch)$#u',
'c', $this->word);

        } elseif ($this->searchIfInRv(array('gh')) !== false)
{
            $this->word = preg_replace('#(gh)$#u',
'g', $this->word);
        }
    }

    /**
     * Finally
     * turn I and U back into lower case
     */
    private function finish()
    {
        $this->word = UTF8::str_replace(array('I',
'U'), array('i', 'u'), $this->word);
    }
}