B_INCR, "sorprendentment"=> B_INCR, "horrible"=> B_INCR, "barbaritat"=> B_INCR, "considerable"=> B_INCR, "considerablement"=> B_INCR, "decidit"=> B_INCR, "profundament"=> B_INCR, "maleit"=> B_INCR, "maleida"=> B_INCR, "enorme"=> B_INCR, "tot"=> B_INCR, "tota"=> B_INCR, "completament"=> B_INCR, "especialment"=> B_INCR, "excepcionalment"=> B_INCR, "extremadament"=> B_INCR, "fabulos"=> B_INCR, "condemnat"=> B_INCR, "molt"=> B_INCR, "fotut"=> B_INCR, "frickin"=> B_INCR, "frigging"=> B_INCR, "friggin"=> B_INCR, "completament"=> B_INCR, "fucking"=> B_INCR, "greatly"=> B_INCR, "hella"=> B_INCR, "highly"=> B_INCR, "enormement"=> B_INCR, "increible"=> B_INCR, "intensament"=> B_INCR, "majoritariament"=> B_INCR, "mes"=> B_INCR, "major"=> B_INCR, "particular"=> B_INCR, "estrictament"=> B_INCR, "simplement"=> B_INCR, "prou"=> B_INCR, "realment"=> B_INCR, "destacable"=> B_INCR, "tan"=> B_INCR, "tant"=> B_INCR, "susbtancia"=> B_INCR, "en esència"=> B_INCR, "thoroughly"=> B_INCR, "totalment"=> B_INCR, "tremendament"=> B_INCR, "uber"=> B_INCR, "increible"=> B_INCR, "inusual"=> B_INCR, "utterly"=> B_INCR, "molt"=> B_INCR, "prou"=> B_INCR, "la major"=> B_DECR, "rarament"=> B_DECR, "difícil"=> B_DECR, "ja n'hi ha prou"=> B_DECR, "tipus de"=> B_DECR, "kinda"=> B_DECR, "prou bò"=> B_DECR, "prou bona"=> B_INCR, "kind-of"=> B_DECR, "menys"=> B_DECR, "menut"=> B_DECR, "marginal"=> B_DECR, "en ocasions"=> B_DECR, "en part"=> B_DECR, "escasament"=> B_DECR, "lleugerament"=> B_DECR, "somewhat"=> B_DECR, "tipus de"=> B_DECR, "sorta"=> B_DECR, "un poc"=> B_DECR, "sort-of"=> B_DECR]; // check for special case idioms using a sentiment-laden keyword known to SAGE const SPECIAL_CASE_IDIOMS = ["quin goig"=> 3, "per menys"=> 3, "si clar"=> 1.5, "burro"=> -2, "burrot" => -2, "ruc" => -2, "anda que"=> 2, "poca solta"=> -1.5, "quin poc trellat"=> -1.5, "anda calla"=> -2, "ni un"=> -2, "la mare que els ha parit"=> -2, "no tenen vergonya"=> -2, "Enhorabona"=>3]; ##Static methods## /* Normalize the score to be between -1 and 1 using an alpha that approximates the max expected value */ function normalize($score, $alpha=15){ $norm_score = $score/sqrt(($score*$score) + $alpha); return $norm_score; } /* Give a sentiment intensity score to sentences. */ class SentimentIntensityAnalyzer{ private $lexicon_file = ""; private $lexicon = ""; private $current_sentitext = null; function __construct($lexicon_file="vader_sentiment_lexicon_cat.txt"){ //Not sure about this as it forces lexicon file to be in the same directory as executing script $this->lexicon_file = realpath(dirname(__FILE__)) . "/" . $lexicon_file; $this->lexicon = $this->make_lex_dict(); } /* Determine if input contains negation words */ function IsNegated($wordToTest, $include_nt=true){ if(in_array($wordToTest,NEGATE)){ return true; } if ($include_nt) { if (strpos($wordToTest,"n't")){ return true; } } return false; } /* Convert lexicon file to a dictionary */ function make_lex_dict(){ $lex_dict = []; $fp = fopen($this->lexicon_file,"r"); if(!$fp){ die("Cannot load lexicon file"); } while (($line = fgets($fp, 4096)) !== false) { list($word, $measure) = explode("\t",trim($line)); //.strip().split('\t')[0:2] $lex_dict[$word] = $measure; //lex_dict[word] = float(measure) } return $lex_dict; } private function IsKindOf($firstWord,$secondWord){ return "tipus" === strtolower($firstWord) && "de" === strtolower($secondWord); } private function IsBoosterWord($word){ return array_key_exists(strtolower($word),BOOSTER_DICT); } private function getBoosterScaler($word){ return BOOSTER_DICT[strtolower($word)]; } private function IsInLexicon($word){ $lowercase = strtolower($word); return array_key_exists($lowercase,$this->lexicon); } private function IsUpperCaseWord($word){ return ctype_upper($word); } private function getValenceFromLexicon($word){ return $this->lexicon[strtolower($word)]; } private function getTargetWordFromContext($wordInContext){ return $wordInContext[count($wordInContext)-1]; } /* Gets the precedding two words to check for emphasis */ private function getWordInContext($wordList,$currentWordPosition){ $precedingWordList =[]; //push the actual word on to the context list array_unshift($precedingWordList,$wordList[$currentWordPosition]); //If the word position is greater than 2 then we know we are not going to overflow if(($currentWordPosition-1)>=0){ array_unshift($precedingWordList,$wordList[$currentWordPosition-1]); }else{ array_unshift($precedingWordList,""); } if(($currentWordPosition-2)>=0){ array_unshift($precedingWordList,$wordList[$currentWordPosition-2]); }else{ array_unshift($precedingWordList,""); } if(($currentWordPosition-3)>=0){ array_unshift($precedingWordList,$wordList[$currentWordPosition-3]); }else{ array_unshift($precedingWordList,""); } return $precedingWordList; } /* Return a float for sentiment strength based on the input text. Positive values are positive valence, negative value are negative valence. */ function getSentiment($text){ $this->current_sentitext = new SentiText($text); $sentiments = []; $words_and_emoticons = $this->current_sentitext->words_and_emoticons; for($i=0;$iIsBoosterWord($wordBeingTested)){ echo "\t\tThe word is a booster word: setting sentiment to 0.0\n"; }*/ //If the word is not in the Lexicon then it does not express sentiment. So just ignore it. if($this->IsInLexicon($wordBeingTested)){ //Special case because kind is in the lexicon so the modifier kind of needs to be skipped if("tipus" !=$words_and_emoticons[$i] && "de" != $words_and_emoticons[$i+1]){ $valence = $this->getValenceFromLexicon($wordBeingTested); $wordInContext = $this->getWordInContext($words_and_emoticons,$i); //If we are here then we have a word that enhance booster words $valence = $this->adjustBoosterSentiment($wordInContext,$valence); } } array_push($sentiments,$valence); } //Once we have a sentiment for each word adjust the sentimest if but is present $sentiments = $this->_but_check($words_and_emoticons, $sentiments); return $this->score_valence($sentiments, $text); } private function applyValenceCapsBoost($targetWord,$valence){ if($this->IsUpperCaseWord($targetWord) && $this->current_sentitext->is_cap_diff){ if($valence > 0){ $valence += C_INCR; } else{ $valence -= C_INCR; } } return $valence; } /* Check if the preceding words increase, decrease, or negate/nullify the valence */ private function boosterScaleAdjustment($word, $valence){ $scalar = 0.0; if(!$this->IsBoosterWord($word)){ return $scalar; } $scalar = $this->getBoosterScaler($word); if ($valence < 0){ $scalar *= -1; } //check if booster/dampener word is in ALLCAPS (while others aren't) $scalar = $this->applyValenceCapsBoost($word,$scalar); return $scalar; } // dampen the scalar modifier of preceding words and emoticons // (excluding the ones that immediately preceed the item) based // on their distance from the current item. private function dampendBoosterScalerByPosition($booster,$position){ if(0===$booster){ return $booster; } if(1==$position){ return $booster*0.95; } if(2==$position){ return $booster*0.9; } return $booster; } private function adjustBoosterSentiment($wordInContext,$valence){ //The target word is always the last word $targetWord = $this->getTargetWordFromContext($wordInContext); //check if sentiment laden word is in ALL CAPS (while others aren't) and apply booster $valence = $this->applyValenceCapsBoost($targetWord,$valence); $valence = $this->modifyValenceBasedOnContext($wordInContext,$valence); return $valence; } private function modifyValenceBasedOnContext($wordInContext,$valence){ $wordToTest = $this->getTargetWordFromContext($wordInContext); //if($this->IsInLexicon($wordToTest)){ // continue; //} for($i=0;$iboosterScaleAdjustment($wordInContext[$i], $valence); $scalarValue = $this->dampendBoosterScalerByPosition($scalarValue,$i); $valence = $valence+$scalarValue; } $valence = $this->_never_check($wordInContext, $valence); $valence = $this->_idioms_check($wordInContext, $valence); # future work: consider other sentiment-laden idioms # other_idioms = # {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, # "upper hand": 1, "break a leg": 2, # "cooking with gas": 2, "in the black": 2, "in the red": -2, # "on the ball": 2,"under the weather": -2} $valence = $this->_least_check($wordInContext, $valence); return $valence; } function _least_check($wordInContext, $valence){ # check for negation case using "least" //if the previous word is least" if(strtolower($wordInContext[2]) == "sense importància"){ //but not "at least {word}" "very least {word}" if (strtolower($wordInContext[1]) != "a" && strtolower($wordInContext[1]) != "molt"){ $valence = $valence*N_SCALAR; } } return $valence; } function _but_check($words_and_emoticons, $sentiments){ # check for modification in sentiment due to contrastive conjunction 'però' $bi = array_search("pero",$words_and_emoticons); if(!$bi){ $bi = array_search("PERO",$words_and_emoticons); } if($bi){ for($si=0;$si $bi){ $sentiments[$si] = $sentiments[$si]*1.5; } } } return $sentiments; } function _idioms_check($wordInContext, $valence){ $onezero = sprintf("%s %s",$wordInContext[2], $wordInContext[3]); $twoonezero = sprintf("%s %s %s",$wordInContext[1], $wordInContext[2], $wordInContext[3]); $twoone = sprintf("%s %s",$wordInContext[1], $wordInContext[2]); $threetwoone = sprintf("%s %s %s",$wordInContext[0], $wordInContext[1], $wordInContext[2]); $threetwo = sprintf("%s %s",$wordInContext[0], $wordInContext[1]); $zeroone = sprintf("%s %s",$wordInContext[3], $wordInContext[2]); $zeroonetwo = sprintf("%s %s %s",$wordInContext[3], $wordInContext[2], $wordInContext[1]); $sequences = [$onezero, $twoonezero, $twoone, $threetwoone, $threetwo]; foreach($sequences as $seq){ if (array_key_exists(strtolower($seq), SPECIAL_CASE_IDIOMS)){ $valence = SPECIAL_CASE_IDIOMS[$seq]; break; } /* Positive idioms check. Not implementing it yet if(count($words_and_emoticons)-1 > $i){ $zeroone = sprintf("%s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1]); if (in_array($zeroone, SPECIAL_CASE_IDIOMS)){ $valence = SPECIAL_CASE_IDIOMS[$zeroone]; } } if(count($words_and_emoticons)-1 > $i+1){ $zeroonetwo = sprintf("%s %s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1], $words_and_emoticons[$i+2]); if (in_array($zeroonetwo, SPECIAL_CASE_IDIOMS)){ $valence = SPECIAL_CASE_IDIOMS[$zeroonetwo]; } } */ // check for booster/dampener bi-grams such as 'sort of' or 'kind of' if($this->IsBoosterWord($threetwo) || $this->IsBoosterWord($twoone)){ $valence = $valence+B_DECR; } } return $valence; } function _never_check($wordInContext,$valance){ //If the sentiment word is preceded by never so/this we apply a modifier $neverModifier = 0; if("mai" == $wordInContext[0]){ $neverModifier = 1.25; }else if("mai" == $wordInContext[1]){ $neverModifier = 1.5; } if("aixi" == $wordInContext[1] || "aixi"== $wordInContext[2] || "este" == $wordInContext[1] || "este" == $wordInContext[2]){ $valance *= $neverModifier; } //if any of the words in context are negated words apply negative scaler foreach($wordInContext as $wordToCheck){ if($this->IsNegated($wordToCheck)){ $valance *= B_DECR; } } return $valance; } function _punctuation_emphasis($sum_s, $text){ # add emphasis from exclamation points and question marks $ep_amplifier = $this->_amplify_ep($text); $qm_amplifier = $this->_amplify_qm($text); $punct_emph_amplifier = $ep_amplifier+$qm_amplifier; return $punct_emph_amplifier; } function _amplify_ep($text){ # check for added emphasis resulting from exclamation points (up to 4 of them) $ep_count = substr_count($text,"!"); if ($ep_count > 4){ $ep_count = 4; } # (empirically derived mean sentiment intensity rating increase for # exclamation points) $ep_amplifier = $ep_count*0.292; return $ep_amplifier; } function _amplify_qm($text){ # check for added emphasis resulting from question marks (2 or 3+) $qm_count = substr_count ($text,"?"); $qm_amplifier = 0; if ($qm_count > 1){ if ($qm_count <= 3){ # (empirically derived mean sentiment intensity rating increase for # question marks) $qm_amplifier = $qm_count*0.18; }else{ $qm_amplifier = 0.96; } } return $qm_amplifier; } function _sift_sentiment_scores($sentiments){ # want separate positive versus negative sentiment scores $pos_sum = 0.0; $neg_sum = 0.0; $neu_count = 0; foreach($sentiments as $sentiment_score){ if($sentiment_score > 0){ $pos_sum += $sentiment_score +1; # compensates for neutral words that are counted as 1 } if ($sentiment_score < 0){ $neg_sum += $sentiment_score -1; # when used with math.fabs(), compensates for neutrals } if ($sentiment_score == 0){ $neu_count += 1; } } return [$pos_sum, $neg_sum, $neu_count]; } function score_valence($sentiments, $text){ if ($sentiments){ $sum_s = array_sum($sentiments); # compute and add emphasis from punctuation in text $punct_emph_amplifier = $this->_punctuation_emphasis($sum_s, $text); if ($sum_s > 0){ $sum_s += $punct_emph_amplifier; } elseif ($sum_s < 0){ $sum_s -= $punct_emph_amplifier; } $compound = normalize($sum_s); # discriminate between positive, negative and neutral sentiment scores list($pos_sum, $neg_sum, $neu_count) = $this->_sift_sentiment_scores($sentiments); if ($pos_sum > abs($neg_sum)){ $pos_sum += $punct_emph_amplifier; } elseif ($pos_sum < abs($neg_sum)){ $neg_sum -= $punct_emph_amplifier; } $total = $pos_sum + abs($neg_sum) + $neu_count; $pos =abs($pos_sum / $total); $neg = abs($neg_sum / $total); $neu = abs($neu_count / $total); }else{ $compound = 0.0; $pos = 0.0; $neg = 0.0; $neu = 0.0; } $sentiment_dict = ["neg" => round($neg, 3), "neu" => round($neu, 3), "pos" => round($pos, 3), "compound" => round($compound, 4)]; return $sentiment_dict; } } ?>