6.0.0-git
2024-04-16

Diff for ConvertSerializedDataToUTF8 between 2 and 3

+ How to convert Serialized Data to UTF-8

The following script was used at the University of Tübingen, Germany, to convert the serialized data from ISO-8859-15 to UTF-8, 
in the process of migrating from horde 3 to horde 5. As it was not intended to be used at other sites, it is tailored to
the environment of the University of Tübingen. 

* '''THIS SCRIPT COMES WITH NO WARENTY'''
* '''BACKUP YOUR DATABASE '''
 
This script expects that the database is converted to utf-8 already, and will fix the serialization of non-ASCII data

<code>
#!/usr/bin/php
# Copyright: 2015  Michael Menge michael.menge(at)uni-tuebingen(dot)de
# http://www.gnu.org/licenses/gpl
<?php

define('LOG_LEVEL', LOG_INFO);

$errors = array(LOG_ERR=>0, LOG_WARNING=>0, LOG_NOTICE=>0, LOG_INFO=>0, LOG_DEBUG=>0);


function printLog ($level, $string) {
    global $errors;
    $errors[$level]++;
    if ($level <= LOG_LEVEL) {
        echo $string;
    }
}

$ar=array("horde_alarms"=>array("cols" => array("alarm_internal","alarm_methods","alarm_params")),
"turba_objects"=>array("key" => "object_id", "cols" => array("object_members")),
"turba_shares"=>array("key" => "share_id", "cols" => array("attribute_params")),
"kronolith_events"=>array("key" => "event_id", "cols" => array("event_attendees")),
"ingo_rules"=>array("key" => "rule_id", "cols" => array("rule_conditions")),
"horde_histories"=>array("key"=>"history_id", "cols" => array("history_extra")),
"horde_prefs"=>array("cols" => array("pref_value")));


printLog(LOG_INFO, mb_internal_encoding()."\n");
mb_internal_encoding("UTF-8");
printLog(LOG_INFO, mb_internal_encoding()."\n");
$starttime = time();
$horde_db="host=HOSTNAME dbname=horde user=horde password=XXXXX";

$dbconn = pg_connect($horde_db)
          or die('Verbindungsaufbau fehlgeschlagen: ' . pg_last_error());

foreach ($ar as $table=>$tabledata) {
    $tablekey="";
    if (!isset($tabledata["key"])) {
        $query="CREATE SEQUENCE zdv_encoding_".$table."_seq; ";
        $query.="ALTER TABLE ".$table." ADD COLUMN zdv_encoding_id INTEGER NOT NULL DEFAULT nextval('zdv_encoding_".$table."_seq'); ";
        $query.="CREATE UNIQUE INDEX zdv_encoding_id_idx ON ".$table." (zdv_encoding_id);";
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before ALTER TABLE $table ADD index COLUMN\n");
        $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after ALTER TABLE $table ADD index COLUMN\n");
        $tablekey="zdv_encoding_id";
    } else {
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": key vorhanden\n");
        $tablekey=$tabledata["key"];
    }
    if (isset($tabledata["cols"])) {
        $cols=$tabledata["cols"];
        foreach ($cols as $col) {
            $convert_error=0;
            $convert_failed=0;
            $convert_converted=0;
            $convert_already=0;
            $convert_ascii=0;
            $my_stmname="my_update_".$table."_".$col;
            pg_prepare($my_stmname, "UPDATE ".$table." SET ".$col."=$1 WHERE ".$tablekey."=$2");
            $query = "SELECT ".$tablekey.", ".$col." FROM ".$table." where ".$col." like 'a:%:{%';";
            $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
            printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before UPDATE $table:$col\n");
            while ($line = pg_fetch_array($result, null, PGSQL_ASSOC)) {
                $value=$line[$col];
                $id=$line[$tablekey];
                if (!mb_check_encoding($value,"ASCII")) {
                    $changed_value = mb_convert_encoding ( $value , "ISO-8859-15");
                    $unserial_value=@unserialize($changed_value);
                    if ($unserial_value != FALSE) {
                        mb_convert_variables("UTF-8", "ISO-8859-15", $unserial_value);
                        $converted_value=serialize($unserial_value);
                        printLog(LOG_INFO, "Convert $table:$col ID=$id\n");
                        printLog(LOG_DEBUG, "FROM=$value TO=$converted_value\n");
                        $up_result = pg_execute($dbconn, $my_stmname, array($converted_value, $id));
                        if (!$up_result) {
                            printLog(LOG_ERR, "ERROR WITH QUERY: $query\n");
                            $convert_error++;
                        } else {
                            $convert_converted++;
                        }
                    } else {
                        $unserial_value=@unserialize($value);
                        if ($unserial_value == FALSE) {
                            printLog(LOG_ERR, "FAILED: Unserialize ISO and UTF-8 $table:$col VALUE=$value\n");
                            $convert_failed++;
                        } else {
                            printLog(LOG_INFO, "ALREADY Converted: $table:$col  ID=$id\n");
                            printLog(LOG_DEBUG, "VALUE=$value\n");
                            $convert_already++;
                        }
                    }
                } else {
                    printLog(LOG_DEBUG, "ASCII only $table:$col ID=$id\n");
                    $convert_ascii++;
                }
            }
            printLog(LOG_NOTICE, "--------\nConverted   : $convert_converted\nASCII         : $convert_ascii\nAlready con     : $convert_already\nFailed : $convert_failed\nFehler        : $convert_error\n");
            printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after UPDATE $table:$col\n");
        }
    } else {
        printLog(LOG_WARNING, "No Cols for Table $table found\n");
    }
    if (!isset($tabledata["key"])) {
        $query="DROP INDEX zdv_encoding_id_idx; ";
        $query.="ALTER TABLE ".$table." DROP COLUMN zdv_encoding_id; ";
        $query.="DROP SEQUENCE zdv_encoding_".$table."_seq; ";
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before ALTER TABLE $table DROP COLUMN\n");
        $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after ALTER TABLE $table DROP COLUMN\n");
    }
}
printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after Convert\n");
printLog(LOG_NOTICE, "Errors: ".$errors[LOG_ERR]."\nWarning: ".$errors[LOG_WARNING]."\n");

?>
</code>