Spade
Mini Shell
| Directory:~$ /home/lmsyaran/public_html/joomla5/libraries/vendor/wamania/php-stemmer/src/Stemmer/ |
| [Home] [System Details] [Kill Me] |
<?php
namespace Wamania\Snowball\Stemmer;
use voku\helper\UTF8;
/**
*
* @link http://snowball.tartarus.org/algorithms/spanish/stemmer.html
* @author wamania
*
*/
class Spanish extends Stem
{
/**
* All spanish vowels
*/
protected static $vowels = array('a', 'e',
'i', 'o', 'u', 'á',
'é', 'í', 'ó', 'ú',
'ü');
/**
* {@inheritdoc}
*/
public function stem($word)
{
// we do ALL in UTF-8
if (!UTF8::is_utf8($word)) {
throw new \Exception('Word must be in UTF-8');
}
$this->word = UTF8::strtolower($word);
$this->rv();
$this->r1();
$this->r2();
$this->step0();
$word = $this->word;
$this->step1();
// Do step 2a if no ending was removed by step 1.
if ($this->word == $word) {
$this->step2a();
// Do Step 2b if step 2a was done, but failed to remove a
suffix.
if ($this->word == $word) {
$this->step2b();
}
}
$this->step3();
$this->finish();
return $this->word;
}
/**
* Step 0: Attached pronoun
*
* Search for the longest among the following suffixes
* me se sela selo selas selos la le lo las
les los nos
*
* and delete it, if comes after one of
* (a) iéndo ándo ár ér ír
* (b) ando iendo ar er ir
* (c) yendo following u
*
* in RV. In the case of (c), yendo must lie in RV, but the preceding
u can be outside it.
* In the case of (a), deletion is followed by removing the acute
accent (for example, haciéndola -> haciendo).
*/
private function step0()
{
if ( ($position = $this->searchIfInRv(array('selas',
'selos', 'las', 'los', 'les',
'nos', 'selo', 'sela', 'me',
'se', 'la', 'le', 'lo' ))) !=
false) {
$suffixe = UTF8::substr($this->word, $position);
// a
$a = array('iéndo', 'ándo',
'ár', 'ér', 'ír');
$a = array_map(function($item) use ($suffixe) {
return $item . $suffixe;
}, $a);
if ( ($position2 = $this->searchIfInRv($a)) !== false) {
$suffixe2 = UTF8::substr($this->word, $position2);
$suffixe2 = UTF8::to_utf8(UTF8::to_ascii($suffixe2)); //
unaccent
$this->word = UTF8::substr($this->word, 0,
$position2);
$this->word .= $suffixe2;
$this->word = UTF8::substr($this->word, 0,
$position);
return true;
}
// b
$b = array('iendo', 'ando', 'ar',
'er', 'ir');
$b = array_map(function($item) use ($suffixe) {
return $item . $suffixe;
}, $b);
if ( ($position2 = $this->searchIfInRv($b)) !== false) {
$this->word = UTF8::substr($this->word, 0,
$position);
return true;
}
// c
if ( ($position2 =
$this->searchIfInRv(array('yendo' . $suffixe))) != false) {
$before = UTF8::substr($this->word, ($position2-1), 1);
if ( (isset($before)) && ($before == 'u')
) {
$this->word = UTF8::substr($this->word, 0,
$position);
return true;
}
}
}
return false;
}
/**
* Step 1
*/
private function step1()
{
// anza anzas ico ica icos icas ismo ismos able
ables ible ibles ista
// istas oso osa osos osas amiento amientos imiento
imientos
// delete if in R2
if ( ($position = $this->search(array(
'imientos', 'imiento',
'amientos', 'amiento', 'osas',
'osos', 'osa', 'oso', 'istas',
'ista', 'ibles',
'ible', 'ables', 'able',
'ismos', 'ismo', 'icas', 'icos',
'ica', 'ico', 'anzas', 'anza'))) !=
false) {
if ($this->inR2($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
return true;
}
// adora ador ación adoras adores aciones ante antes
ancia ancias
// delete if in R2
// if preceded by ic, delete if in R2
if ( ($position = $this->search(array(
'adoras', 'adora', 'aciones',
'ación', 'adores', 'ador',
'antes', 'ante', 'ancias',
'ancia'))) != false) {
if ($this->inR2($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
if ( ($position2 =
$this->searchIfInR2(array('ic')))) {
$this->word = UTF8::substr($this->word, 0,
$position2);
}
return true;
}
// logía logías
// replace with log if in R2
if ( ($position = $this->search(array('logías',
'logía'))) != false) {
if ($this->inR2($position)) {
$this->word =
preg_replace('#(logías|logía)$#u', 'log',
$this->word);
}
return true;
}
// ución uciones
// replace with u if in R2
if ( ($position = $this->search(array('uciones',
'ución'))) != false) {
if ($this->inR2($position)) {
$this->word =
preg_replace('#(uciones|ución)$#u', 'u',
$this->word);
}
return true;
}
// encia encias
// replace with ente if in R2
if ( ($position = $this->search(array('encias',
'encia'))) != false) {
if ($this->inR2($position)) {
$this->word =
preg_replace('#(encias|encia)$#u', 'ente',
$this->word);
}
return true;
}
// amente
// delete if in R1
// if preceded by iv, delete if in R2 (and if further preceded
by at, delete if in R2), otherwise,
// if preceded by os, ic or ad, delete if in R2
if ( ($position = $this->search(array('amente'))) !=
false) {
// delete if in R1
if ($this->inR1($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
// if preceded by iv, delete if in R2 (and if further preceded
by at, delete if in R2), otherwise,
if ( ($position2 =
$this->searchIfInR2(array('iv'))) !== false) {
$this->word = UTF8::substr($this->word, 0,
$position2);
if ( ($position3 =
$this->searchIfInR2(array('at'))) !== false) {
$this->word = UTF8::substr($this->word, 0,
$position3);
}
// if preceded by os, ic or ad, delete if in R2
} elseif ( ($position4 =
$this->searchIfInR2(array('os', 'ic',
'ad'))) != false) {
$this->word = UTF8::substr($this->word, 0,
$position4);
}
return true;
}
// mente
// delete if in R2
// if preceded by ante, able or ible, delete if in R2
if ( ($position = $this->search(array('mente'))) !=
false) {
// delete if in R2
if ($this->inR2($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
// if preceded by ante, able or ible, delete if in R2
if ( ($position2 =
$this->searchIfInR2(array('ante', 'able',
'ible'))) != false) {
$this->word = UTF8::substr($this->word, 0,
$position2);
}
return true;
}
// idad idades
// delete if in R2
// if preceded by abil, ic or iv, delete if in R2
if ( ($position = $this->search(array('idades',
'idad'))) != false) {
// delete if in R2
if ($this->inR2($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
// if preceded by abil, ic or iv, delete if in R2
if ( ($position2 =
$this->searchIfInR2(array('abil', 'ic',
'iv'))) != false) {
$this->word = UTF8::substr($this->word, 0,
$position2);
}
return true;
}
// iva ivo ivas ivos
// delete if in R2
// if preceded by at, delete if in R2
if ( ($position = $this->search(array('ivas',
'ivos', 'iva', 'ivo'))) != false) {
// delete if in R2
if ($this->inR2($position)) {
$this->word = UTF8::substr($this->word, 0,
$position);
}
// if preceded by at, delete if in R2
if ( ($position2 =
$this->searchIfInR2(array('at'))) != false) {
$this->word = UTF8::substr($this->word, 0,
$position2);
}
return true;
}
return false;
}
/**
* Step 2a: Verb suffixes beginning y
*/
private function step2a()
{
// if found, delete if preceded by u
// (Note that the preceding u need not be in RV.)
if ( ($position = $this->searchIfInRv(array(
'yamos', 'yendo', 'yeron',
'yan', 'yen', 'yais', 'yas',
'yes', 'yo', 'yó', 'ya',
'ye'))) != false) {
$before = UTF8::substr($this->word, ($position-1), 1);
if ( (isset($before)) && ($before == 'u') ) {
$this->word = UTF8::substr($this->word, 0,
$position);
return true;
}
}
return false;
}
/**
* Step 2b: Other verb suffixes
* Search for the longest among the following suffixes in RV, and
perform the action indicated.
*/
private function step2b()
{
// delete
if ( ($position = $this->searchIfInRv(array(
'iésemos', 'iéramos',
'ábamos', 'iríamos', 'eríamos',
'aríamos', 'áramos', 'ásemos',
'eríais',
'aremos', 'eremos', 'iremos',
'asteis', 'ieseis', 'ierais',
'isteis', 'aríais',
'irían', 'aréis', 'erían',
'erías', 'eréis', 'iréis',
'irías', 'ieran', 'iesen',
'ieron', 'iendo', 'ieras',
'iríais', 'arían', 'arías',
'amos', 'imos', 'ados',
'idos', 'irán', 'irás', 'erán',
'erás', 'ería', 'iría', 'íais',
'arán', 'arás', 'aría',
'iera', 'iese', 'aste',
'iste', 'aban', 'aran', 'asen',
'aron', 'ando', 'abas', 'adas',
'idas', 'ases', 'aras',
'aré', 'erá', 'eré',
'áis', 'ías', 'irá', 'iré',
'aba', 'ían', 'ada', 'ara',
'ase', 'ida', 'ado', 'ido',
'ará',
'ad', 'ed', 'id',
'ís', 'ió', 'ar', 'er',
'ir', 'as', 'ía', 'an'
))) != false) {
$this->word = UTF8::substr($this->word, 0, $position);
return true;
}
// en es éis emos
// delete, and if preceded by gu delete the u (the gu need not
be in RV)
if ( ($position = $this->searchIfInRv(array('éis',
'emos', 'en', 'es'))) != false) {
$this->word = UTF8::substr($this->word, 0, $position);
if ( ($position2 = $this->search(array('gu'))) !=
false) {
$this->word = UTF8::substr($this->word, 0,
($position2+1));
}
return true;
}
}
/**
* Step 3: residual suffix
* Search for the longest among the following suffixes in RV, and
perform the action indicated.
*/
private function step3()
{
// os a o á í ó
// delete if in RV
if ( ($position = $this->searchIfInRv(array('os',
'a', 'o', 'á', 'í',
'ó'))) != false) {
$this->word = UTF8::substr($this->word, 0, $position);
return true;
}
// e é
// delete if in RV, and if preceded by gu with the u in RV
delete the u
if ( ($position = $this->searchIfInRv(array('e',
'é'))) != false) {
$this->word = UTF8::substr($this->word, 0, $position);
if ( ($position2 =
$this->searchIfInRv(array('u'))) != false) {
$before = UTF8::substr($this->word, ($position2-1), 1);
if ( (isset($before)) && ($before == 'g')
) {
$this->word = UTF8::substr($this->word, 0,
$position2);
return true;
}
}
}
return false;
}
/**
* And finally:
* Remove acute accents
*/
private function finish()
{
$this->word = UTF8::str_replace(array('á',
'í', 'ó', 'é', 'ú'),
array('a', 'i', 'o', 'e',
'u'), $this->word);
}
}