>num) and duplicate(md5) detection. # It doesn't know about bans, so those need to be done seperately, but it # doesn't care if that is before or after. # It really should run after valid file checks (jpg/png/gif, >0x0, etc) but that's not critical. # # Synchronization: There is (in theory) a minor race condition because the tables are not locked. # It's not exploitable for any useful purpose, and it's blocked by the floodcheck # # Changelog: # 2008/02/20 04:20: Added changelog, fixed $txt error that killed all posts # 2008/02/20 04:56: Fixed the signal-ratio filter to handle the stupid HTML # 2008/02/20 05:21: Added a check for repeated characters # 2008/02/20 07:05: Added a check for long spams # 2008/02/20 13:02: Rearranged the filters for better results. # 2008/02/20 23:58: Fixed a bug that broke posts with two quotes far apart # 2008/02/21 01:57: Fixed a dumb bug with the number filter.. # 2008/02/21 02:06: Adding content-percentage info to the content filter. # 2008/02/21 02:15: Adjusted long-text filter. # 2008/02/21 02:18: Removed long-text filter. # 2008/02/22 16:43: Added mute-expiring. # 2008/02/22 17:54: Fixed mute-expiring. # 2008/02/22 18:13: Added #nextnow and #muteinfo secret mod capcodes # 2008/02/22 18:21: Fixed #muteinfo for mods. # 2015/10/24 16:36: Cleanup the code and put the robot back. # $email, $sub, $name fields aren't used anymore. # removed $mod parameter. # 2020/11/16 08:09: Update text hashes for every post to prune stale entries define('R9K_SIGNAL_RATIO', 0.1); define('R9K_MAX_DURATION', 31536000); // one year define('R9K_DATE_FORMAT', '%m/%d/%y %H:%M:%S'); define('R9K_DEMUTE_PERIOD', 86400); // one day define('R9K_SNR_MIN_LEN', 10); // minimum txt length for signal ratio check define('R9K_OK', 'OK'); define('R9K_DB_ERROR', 'Database error.'); define('R9K_EMPTY_COM', 'Textless posts are not allowed.'); define('R9K_ASCII_ONLY', 'Non-ASCII text is not allowed.'); define('R9K_MUTED', "You're muted! You cannot post until %s, %s from now"); define('R9K_MUTE_ERROR', "You have been muted for %s, because %s"); define('R9K_LOW_SNR', 'your comment was too low in content (%0.2f%% content).'); define('R9K_DUP_TXT', 'your comment was not original.'); define('R9K_DUP_IMG', 'your image was not original.'); function r9k_process($com, $md5, $ip) { // Blank file if ($md5 == 'd41d8cd98f00b204e9800998ecf8427e') { $md5 = null; } if ($com === ''){ return R9K_EMPTY_COM; } if (preg_match('/[\\x80-\\xFF]/', $com)) { return R9K_ASCII_ONLY; } $table_mutes = ROBOT9000_MUTES; $table_posts = ROBOT9000_POSTS; $ip = (int)$ip; $mute = false; $demute = false; $timeout_power = 0; $query = << $now) { $duration = r9k_pretty_duration($row['mute_until'] - $now); $when = strftime(R9K_DATE_FORMAT, $row['mute_until']); return sprintf(R9K_MUTED, $when, $duration); } if ($row['next_expire'] < $now){ $demute = true; } } $txt = strtolower($com); // Strip HTML $stxt=preg_replace('/<.*?>/s','', $txt); // Original byte length $olength = strlen($stxt); // Strip >>123 quotelinks $stxt = preg_replace('/>>\d+/', '', $stxt); // Strip html entities $stxt = preg_replace('/&#?\w+;/', '', $stxt); // Strip non-alnum chars $stxt = preg_replace('/[^a-z\d-]+/', '', $stxt); // Trim leading and trailing numeric characters $stxt = preg_replace('/^\d*(.*)\d*$/', '\1', $stxt); // Compress repeated characters: aaa -> a $stxt = preg_replace('/(.)\\1{2,}/', '\\1', $stxt); // Check signal ratio if (strlen($txt) > R9K_SNR_MIN_LEN) { $ratio = strlen($stxt) / $olength; if ($ratio < R9K_SIGNAL_RATIO) { $mute = sprintf(R9K_LOW_SNR, $ratio * 100.0); } } if ($mute === false) { $txt_hash = md5($stxt); // Check if hashes match $query = "SELECT text, image FROM `$table_posts` WHERE text = '%s'"; /* if ($md5) { $query .= " OR image = '%s'"; $res = mysql_board_call($query, $txt_hash, $md5); } else {*/ $res = mysql_board_call($query, $txt_hash); //} if (!$res) { //return R9K_OK; return R9K_DB_ERROR; } // Post is good. Insert hashes. if (mysql_num_rows($res) < 1) { $query = "INSERT INTO `$table_posts` (text) VALUES('%s')"; mysql_board_call($query, $txt_hash); } // Duplicates found. else { //$row = mysql_fetch_assoc($res); //if ($row['text'] === $txt_hash) { $mute = R9K_DUP_TXT; //} //else if ($md5 && $row['image'] === $md5) { // $mute = R9K_DUP_IMG; //} // Update the hash with a new timestamp $query = "UPDATE `$table_posts` SET created_on = NOW() WHERE text = '%s' LIMIT 1"; mysql_board_call($query, $txt_hash); } } // Muted if ($mute !== false) { ++$timeout_power; $mute_duration = pow(2, $timeout_power); if ($mute_duration > R9K_MAX_DURATION) { $timeout_power--; $mute_duration = R9K_MAX_DURATION; } $next_expire = R9K_DEMUTE_PERIOD; $query = << 0, timeout_power - 1, 0), next_expire = DATE_ADD(NOW(), INTERVAL $next_expire SECOND) WHERE ip = $ip SQL; $res = mysql_board_call($query); } return R9K_OK; } } function r9k_pretty_duration($secs){ $w = (int)($secs / 604800); $d = (int)($secs / 86400) % 7; $h = (int)($secs / 3600) % 24; $m = ((int)($secs / 60)) % 60; $s = ((int)$secs) % 60; $out = array(); $pairs = array( array($w, 'week'), array($d, 'day'), array($h, 'hour'), array($m, 'minute'), array($s, 'second') ); foreach($pairs as $v){ if ($v[0] !== 0) { $out[] = $v[0] . ' ' . $v[1] . ($v[0] === 1 ? '' : 's'); } } return implode(' ', $out); }