Spade

Mini Shell

Directory:~$ /home/lmsyaran/public_html/joomla5/libraries/vendor/wamania/php-stemmer/src/Stemmer/
Upload File

[Home] [System Details] [Kill Me]
Current File:~$ /home/lmsyaran/public_html/joomla5/libraries/vendor/wamania/php-stemmer/src/Stemmer/Finnish.php

<?php
/**
 * Finnish Snowball Stemmer.
 *
 * @author msaari <mikko@mikkosaari.fi>
 */
namespace Wamania\Snowball\Stemmer;

use voku\helper\UTF8;

/**
 * Finnish Snowball Stemmer.
 *
 * @link http://snowball.tartarus.org/algorithms/finnish/stemmer.html
 * @author msaari
 */
class Finnish extends Stem
{
    /**
     * All swedish vowels
     */
    protected static $vowels = array('a', 'e',
'i', 'o', 'u', 'y', 'ä',
'ö');

    protected static $consonants = array('b', 'c',
'd', 'f', 'g', 'h', 'j',
    'k', 'l', 'm', 'n',
'p', 'q', 'r', 's', 't',
'v', 'w', 'x', 'z');

    protected static $restrictedVowels = array('a',
'e', 'i', 'o', 'u', 'ä',
'ö');

    /**
     * Long restricted vowels, ie. doubled vowels.
     */
    protected static $longVowels = array('aa', 'ee',
'ii', 'oo', 'uu', 'ää',
'öö');

    private $_removedInStep3 = false;

    /**
     * {@inheritdoc}
     */
    public function stem($word)
    {
        // we do ALL in UTF-8
        if (! UTF8::is_utf8($word)) {
            throw new \Exception('Word must be in UTF-8');
        }

        $this->word = Utf8::strtolower($word);

        // R1 and R2 are then defined in the usual way
        $this->r1();
        $this->r2();

        // Do each of steps 1, 2 3, 4, 5 and 6.

        $this->step1();
        $this->step2();
        $this->step3();
        $this->step4();
        $this->step5();
        $this->step6();

        return $this->word;
    }

    /**
     * Step 1
     *
     * Search for the longest among the following suffixes in R1, and
perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step1()
    {
        // (a) kin   kaan   kään   ko   kö   han   hän   pa   pä
        //      delete if preceded by n, t or a vowel
        if (($position = $this->searchIfInR1(array('kaan',
'kään', 'kin', 'han', 'hän',
'ko', 'kö', 'pa', 'pä'))) !==
false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if (in_array($lastLetter, array_merge(['t',
'n'], self::$vowels))) {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->r1();
                $this->r2();
            }

            return true;
        }

        //  sti
        //  delete if in R2
        if (($position = $this->searchIfInR1(array('sti')))
!== false) {
            if ($this->inR2($position)) {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->r1();
                $this->r2();
            }

            return true;
        }
    }

    /**
     * Step 2: possessives.
     *
     * Search for the longest among the following suffixes in R1, and
perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step2()
    {
        // si
        //  delete if not preceded by k
        if (($position = $this->searchIfInR1(array('si'))) !==
false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if ($lastLetter !== 'k') {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // ni
        //  delete
        if (($position = $this->searchIfInR1(array('ni'))) !==
false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            // if preceded by kse, replace with ksi
            if ( ($position = $this->search(array('kse'))) !==
false) {
                $this->word = preg_replace('#(kse)$#u',
'ksi', $this->word);
            }
            $this->r1();
            $this->r2();
            return true;
        }

        // nsa   nsä   mme   nne
        //  delete
        if (($position = $this->searchIfInR1(array('nsa',
'nsä', 'mme', 'nne'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->r1();
            $this->r2();
            return true;
        }

        // an
        //  delete if preceded by one of   ta   ssa   sta   lla   lta   na
        if (($position = $this->searchIfInR1(array('an'))) !==
false) {
            $word = Utf8::substr($this->word, 0, $position);
            $lastThreeLetters = Utf8::substr($word, -3, 3);
            $lastTwoLetters = Utf8::substr($word, -2, 2);
            if (in_array($lastThreeLetters, array('ssa',
'sta', 'lla', 'lta'), true) ||
in_array($lastTwoLetters, array('na', 'ta'), true)) {
                $this->word = $word;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // än
        // delete if preceded by one of   tä   ssä   stä   llä   ltä  
nä
        if (($position = $this->searchIfInR1(array('än')))
!== false) {
            $word = Utf8::substr($this->word, 0, $position);
            $lastThreeLetters = Utf8::substr($word, -3, 3);
            $lastTwoLetters = Utf8::substr($word, -2, 2);
            if (in_array($lastThreeLetters, array('ssä',
'stä', 'llä', 'ltä'), true) ||
in_array($lastTwoLetters, array('nä', 'tä'), true)) {
                $this->word = $word;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // en
        // delete if preceded by one of   lle   ine
        if (($position = $this->searchIfInR1(array('en'))) !==
false) {
            $word = Utf8::substr($this->word, 0, $position);
            if (Utf8::strlen($this->word) > 4) {
                $lastThreeLetters = Utf8::substr($this->word, -5, 3);
                if (in_array($lastThreeLetters, array('lle',
'ine'), true)) {
                    $this->word = $word;
                    $this->r1();
                    $this->r2();
                    return true;
                }
            }
        }
    }

    /**
     * Step 3: cases
     *
     * Search for the longest among the following suffixes in R1, and
perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step3()
    {
        // hXn
        // delete if preceded by X, where X is a V other than u (a/han,
e/hen etc)
        foreach (self::$restrictedVowels as $vowel) {
            if ($vowel === 'u') {
                continue;
            }
            if (($position = $this->searchIfInR1(array('h' .
$vowel . 'n'))) !== false) {
                $lastLetter = Utf8::substr($this->word, $position-1, 1);
                if ($lastLetter === $vowel) {
                    $this->word = Utf8::substr($this->word, 0,
$position);
                    $this->_removedInStep3 = true;
                    $this->r1();
                    $this->r2();
                }
                return true;
            }
        }

        // siin   den   tten
        // delete if preceded by Vi
        if (($position = $this->searchIfInR1(array('siin',
'den', 'tten'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
            if ($lastLetter === 'i') {
                $nextLastLetter = Utf8::substr($this->word,
($position-2), 1);
                if (in_array($nextLastLetter, self::$restrictedVowels,
true)) {
                    $this->word = Utf8::substr($this->word, 0,
$position);
                    $this->_removedInStep3 = true;
                    $this->r1();
                    $this->r2();
                    return true;
                }
            }
        }

        // seen
        // delete if preceded by LV
        if (($position = $this->searchIfInR1(array('seen')))
!== false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);

            if (in_array($lastLetters, self::$longVowels, true)) {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // tta    ttä
        // delete if preceded by e
        if (($position = $this->searchIfInR1(array('tta',
'ttä'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if ($lastLetter === 'e') {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // ta  tä  ssa  ssä  sta  stä  lla  llä  lta  ltä  lle  na 
nä  ksi  ine
        // delete
        if (($position = $this->searchIfInR1(array('ssa',
'ssä', 'sta', 'stä', 'lla',
'llä', 'lta', 'ltä', 'lle',
'ksi', 'na', 'nä', 'ine',
'ta', 'tä'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->_removedInStep3 = true;
            $this->r1();
            $this->r2();
            return true;
        }

        // a    ä
        // delete if preceded by cv
        if (($position = $this->searchIfInR1(array('a',
'ä'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
            $nextLastLetter = Utf8::substr($this->word, ($position-2),
1);

            if (in_array($lastLetter, self::$vowels, true) &&
in_array($nextLastLetter, self::$consonants, true)) {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // n
        // delete, and if preceded by LV or ie, delete the last vowel
        if (($position = $this->searchIfInR1(array('n'))) !==
false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);

            if (in_array($lastLetters, self::$longVowels, true) ||
$lastLetters === 'ie') {
                $this->word = Utf8::substr($this->word, 0,
$position-1);
            } else {
                $this->word = Utf8::substr($this->word, 0,
$position);
            }
            $this->r1();
            $this->r2();
            $this->_removedInStep3 = true;
            return true;
        }
    }

    /**
     * Step 4: other endings
     *
     * Search for the longest among the following suffixes in R2, and
perform
     * the action indicated
     *
     * @return boolean True when something is done.
     */
    private function step4()
    {
        // mpi   mpa   mpä   mmi   mma   mmä
        // delete if not preceded by po
        if (($position = $this->searchIfInR2(array('mpi',
'mpa', 'mpä', 'mmi', 'mma',
'mmä'))) !== false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);
            if ($lastLetters !== 'po') {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // impi   impa   impä   immi   imma   immä   eja   ejä
        // delete
        if (($position = $this->searchIfInR2(array('impi',
'impa', 'impä', 'immi', 'imma',
'immä', 'eja', 'ejä'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->r1();
            $this->r2();
            return true;
        }
    }

    /**
     * Step 5: plurals
     * If an ending was removed in step 3, delete a final i or j if in R1;
     * otherwise,
     * if an ending was not removed in step 3, delete a final t in R1 if it
     * follows a vowel, and, if a t is removed, delete a final mma or imma
in
     * R2, unless the mma is preceded by po.
     *
     * @return boolean True when something is done.
     */
    private function step5()
    {
        if ($this->_removedInStep3) {
            if (($position = $this->searchIfInR1(array('i',
'j'))) !== false) {
                $this->word = Utf8::substr($this->word, 0,
$position);
                $this->r1();
                $this->r2();
                return true;
            }
        } else {
            if (($position = $this->searchIfInR1(array('t')))
!== false) {
                $lastLetter = Utf8::substr($this->word, ($position-1),
1);
                if (in_array($lastLetter, self::$vowels, true)) {
                    $this->word = Utf8::substr($this->word, 0,
$position);
                    $this->r1();
                    $this->r2();
                    if (($position2 =
$this->searchIfInR2(array('imma'))) !== false) {
                        $this->word = Utf8::substr($this->word, 0,
$position2);
                        $this->r1();
                        $this->r2();
                        return true;
                    } elseif (($position2 =
$this->searchIfInR2(array('mma'))) !== false) {
                        $lastLetters = Utf8::substr($this->word,
($position2-2), 2);
                        if ($lastLetters !== 'po') {
                            $this->word = Utf8::substr($this->word,
0, $position2);
                            $this->r1();
                            $this->r2();
                            return true;
                        }
                    }
                }
            }
        }

    }

    /**
     * Step 6: tidying up
     *
     * Do in turn steps (a), (b), (c), (d), restricting all tests to the
     * region R1.
     */
    private function step6()
    {
        // a) If R1 ends LV
        // delete the last letter
        if (($position = $this->searchIfInR1(self::$longVowels)) !==
false) {
            $this->word = Utf8::substr($this->word, 0, $position+1);
            $this->r1();
            $this->r2();
        }

        // b) If R1 ends cX, c a consonant and X one of   a   ä   e   i,
        // delete the last letter
        $lastLetter = Utf8::substr($this->r1, -1, 1);
        $secondToLastLetter = Utf8::substr($this->r1, -2, 1);
        if (in_array($secondToLastLetter, self::$consonants, true)
&& in_array($lastLetter, array('a', 'e',
'i', 'ä'))) {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // c) If R1 ends oj or uj
        // delete the last letter
        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
        if (in_array($twoLastLetters, array('oj',
'uj'))) {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // d) If R1 ends jo
        // delete the last letter
        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
        if ($twoLastLetters === 'jo') {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // e) If the word ends with a double consonant followed by zero or
more
        // vowels, remove the last consonant (so eläkk -> eläk,
        // aatonaatto -> aatonaato)
        $endVowels = '';
        for ($i = Utf8::strlen($this->word) - 1; $i > 0; $i--) {
            $letter = Utf8::substr($this->word, $i, 1);
            if (in_array($letter, self::$vowels, true)) {
                $endVowels = $letter . $endVowels;
            } else {
                // check for double consonant
                $prevLetter = Utf8::substr($this->word, $i-1, 1);
                if ($prevLetter === $letter) {
                    $this->word = Utf8::substr($this->word, 0, $i) .
$endVowels;
                }
                break;
            }
        }
    }
}