SEF URLs

I've been approached by @RVR with a request to provide some input with regards to the possibility of rewriting Elgg urls to search engine friendly ones.

Thanks to input from @Steve Clay, I've managed to put together a basic plugin that should help achieve the result. All urls with guids are routed to /seo/ page handler, which then be easily rewritten via .htaccess.

I am welcoming your input, so I can wrap this up and release as a plugin. Obviously, there is a need for some more hooks to override getURL methods, as well as to create some back-end customization facility:

 

<?php

/* hypeSEO
 *
 * @package hypeJunction
 * @subpackage hypeSEO
 *
 * @author Ismayil Khayredinov <ismayil.khayredinov@gmail.com>
 * @copyright Copyrigh (c) 2011, Ismayil Khayredinov
 */

elgg_register_event_handler('init', 'system', 'hj_seo_init');

/**
 * Initialize hypeSEO
 */
function hj_seo_init() {

	$plugin = 'hypeSEO';

	if (!elgg_is_active_plugin('hypeFramework')) {
		register_error(elgg_echo('hj:framework:disabled', array($plugin, $plugin)));
		disable_plugin($plugin);
	}

	$shortcuts = hj_framework_path_shortcuts($plugin);

	elgg_register_plugin_hook_handler('route', 'all', 'hj_seo_reroute');
	elgg_register_page_handler('seo', 'hj_sef_page_handler');
}

/**
 * We will grab the existing $page and forward to sef_page_handler, which will call_user_func the original handler
 */
function hj_seo_reroute($hook, $type, $return, $params) {

	$page = elgg_extract('segments', $return, false);
	$handler = elgg_extract('handler', $return, '');

	if (!$page || empty($page)) {
		return $return;
	}

	$handler_exceptions = array(
		'action',
		'cache',
		'services',
		'export',
		'mt',
		'xml-rpc',
		'rewrite',
		'tag',
		'pg',
		'admin',
		'cron',
		'js',
		'css',
		'ajax',
		'livesearch',
		'activity',
		'setting',
		'friends',
		'friendsof',
		'register',
		'forgotpassword',
		'resetpassword',
		'login',
		'avatar',
		'profile',
		'collections',
		'seo'
	);

	if (in_array($handler, $handler_exceptions)) {
		return $return;
	}

	$nonsef_page = implode('/', $page);
	$nonsef_datalist = "sef:{$handler}/{$nonsef_page}";
	$nonsef_datalist = (strlen($nonsef_datalist) > 255) ? substr($nonsef_datalist,0,255): $nonsef_datalist;
	
	if ($new_url = datalist_get($nonsef_datalist)) {
		forward($new_url);
	}

	foreach ($page as $segment) {
		if (is_numeric($segment)) {
			$entity = get_entity((int) $segment);
			if (elgg_instanceof($entity)) {
				if (!$title = $entity->title) {
					$title = $entity->name;
				}
				$friendly = elgg_get_friendly_title($title);
				$new_page[] = "$entity->guid-$friendly";
			}
		} elseif (!in_array($segment, $new_page)) {
			$new_page[] = $segment;
		}
	}

	if (!is_array($new_page)) {
		return $return;
	}
	array_unshift($new_page, $handler);
	array_unshift($new_page, 'seo');
	$new_page = implode('/', $new_page);

	datalist_set($nonsef_datalist, $new_page);

	forward($new_page);
}

function hj_sef_page_handler($sef_datalist) {

	$sef_datalist = 'seo/' . implode('/', $sef_datalist);
	
	$sef_datalist = (strlen($sef_datalist) > 255) ? substr($sef_datalist,0,255): $sef_datalist;

	$nonsef_page = hj_seo_get_datalist($sef_datalist);
	
	$page = str_replace('sef:', '', $nonsef_page);
	$page = explode('/', $page);

	$handler = $page[0];
	elgg_set_context($handler);
	array_shift($page);
	
	global $CONFIG;
	
	if (isset($CONFIG->pagehandler) && !empty($handler) && isset($CONFIG->pagehandler[$handler])) {
		$function = $CONFIG->pagehandler[$handler];
		$result = call_user_func($function, $page, $handler);
	}
	var_dump(elgg_get_context());
	return $result;
}


function hj_seo_get_datalist($value) {
	global $CONFIG, $DATALIST_VALUE_CACHE;

	$value = trim($value);

	$value = sanitise_string($value);
	if (isset($DATALIST_VALUE_CACHE[$value])) {
		return $DATALIST_VALUE_CACHE[$value];
	}

	// If memcache enabled then cache value in memcache
	$name = null;
	static $datalist_value_memcache;
	if ((!$datalist_value_memcache) && (is_memcache_available())) {
		$datalist_value_memcache = new ElggMemcache('datalist_value_memcache');
	}
	if ($datalist_value_memcache) {
		$name = $datalist_value_memcache->load($value);
	}
	if ($name) {
		return $name;
	}

	$dbprefix = elgg_get_config('dbprefix');

	$result = get_data("SELECT * from {$dbprefix}datalists WHERE value = '$value'");
	
	if ($result) {
		foreach ($result as $row) {
			$DATALIST_VALUE_CACHE[$row->value] = $row->name;

			// Cache it if memcache is available
			if ($datalist_value_memcache) {
				$datalist_value_memcache->save($row->value, $row->name);
			}
		}

		if (isset($DATALIST_VALUE_CACHE[$value])) {
			return $DATALIST_VALUE_CACHE[$value];
		}
	}

	return null;
}