<?php

// Append ellipsis to `string' smartly: if it already ends in one or
// more dots, don't create more than three at the end.
function append_ellipsis ($string)
{
  return rtrim ($string, '.') . '...';
}


function ellipsize_url ($url, $limit = 40)
{
  $query_position = strpos ($url, '?');
  if ($query_position === FALSE)
    $query_position = strlen ($url);

  if ($url[$query_position - 1] == '/')
    {
      // Ending slash is substituted automatically, it is not needed
      // in text representation of an URL.

      $url = substr ($url, 0, $query_position - 1) . substr ($url, $query_position);
    }

  // Check if we need to ellipsize it at all.
  if (strlen ($url) <= $limit)
    return $url;

  static $pattern = '#^([a-z]+://)([^/?]+/?)([^?]*/)?([^/?]*)(\?.*)?$#';

  $url_parts = array ();
  if (preg_match ($pattern, $url, $url_parts))
    {
      $scheme = $url_parts[1];
      $domain = $url_parts[2];
      $path   = $url_parts[3];
      $page   = $url_parts[4];
      $query  = $url_parts[5];

      $length = strlen ($url) - strlen ($query);

      // First try to truncate query, else drop it alltogether.

      $reserve = $limit - $length;
      if ($reserve >= 0)
	{
	  if ($reserve >= 3)
	    $query = append_ellipsis (substr ($query, 0, $reserve - 3));
	  else
	    $query = '';

	  return "$scheme$domain$path$page$query";
	}

      // Next is the path: part of URL after the domain name, but
      // before the filename (or last directory name.)

      if (strlen ($path) > 4)
	{
	  $length  -= strlen ($path) - 4;
	  $reserve  = $limit - $length;
	  $path     = append_ellipsis (substr ($path, 0, $reserve)) . '/';

	  if ($reserve >= 0)
	    return "$scheme$domain$path$page";
	}

      // Now try removing scheme altogether.

      $length  -= strlen ($scheme);
      $reserve -= strlen ($scheme);

      if ($reserve >= 0)
	return "$domain$path$page";

      if (strlen ($page) > 0)
	{
	  // Well, we can try truncating domain name.

	  $length  -= strlen ($path) + (strlen ($domain) - 4);
	  $reserve  = $limit - $length;

	  if ($reserve >= 0)
	    {
	      $domain = append_ellipsis (substr ($domain, 0, $reserve)) . '/';
	      return "$domain$page";
	    }
	}

      // Uh, one last try.

      $url = (strlen ($page) > 0 ? $page : $domain);
      if (strlen ($url) <= $limit)
	return $url;

      // Use fallback.
    }

  // Fallback: either the URL hasn't matched our regular expression
  // for some reason or clever ellipsizing above didn't work.
  return append_ellipsis (substr ($url, 0, $limit - 3));
}


function test ($url, $limit = 40)
{
  $ellipsized_url = ellipsize_url ($url, $limit);

  print "$url\n";
  print "  => $ellipsized_url (" . strlen ($ellipsized_url) . "/$limit)\n";
}


test ("http://test.org/some/long/path/");
test ("http://test.org/some/exceptionally/long/path/");
test ("http://test.org/some/long/path/the-page.html");
test ("http://test.bogus.org/some/long/path/the-page.html");
test ("http://test.bogus.org/some/exceptionally/long/path/long-page-name.html");
test ("http://test.bogus.org/long-page-name-at-the-root.html");
test ("http://short.url/");
test ("http://not.so.short.an.url/");
test ("http://a.veeeeeeeeeeeery.long.domain.name");

test ("http://test.org/some/long/path/?teh=long-query");
test ("http://test.org/some/exceptionally/long/path/?teh=long-query");
test ("http://test.org/some/long/path/the-page.html?teh=long-query");
test ("http://test.bogus.org/some/long/path/the-page.html?teh=long-query");
test ("http://test.bogus.org/some/exceptionally/long/path/long-page-name.html?teh=long-query");
test ("http://test.bogus.org/long-page-name-at-the-root.html?teh=long-query");
test ("http://short.url/?teh=long-query");
test ("http://not.so.short.an.url/?teh=long-query");
test ("http://a.veeeeeeeeeeeery.long.domain.name?teh=long-query");

?>
