mergeduplicates.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #!/usr/bin/env php
  2. <?php
  3. namespace Sabre\VObject;
  4. // This sucks.. we have to try to find the composer autoloader. But chances
  5. // are, we can't find it this way. So we'll do our bestest
  6. $paths = [
  7. __DIR__.'/../vendor/autoload.php', // In case vobject is cloned directly
  8. __DIR__.'/../../../autoload.php', // In case vobject is a composer dependency.
  9. ];
  10. foreach ($paths as $path) {
  11. if (file_exists($path)) {
  12. include $path;
  13. break;
  14. }
  15. }
  16. if (!class_exists('Sabre\\VObject\\Version')) {
  17. fwrite(STDERR, "Composer autoloader could not be loaded.\n");
  18. exit(1);
  19. }
  20. echo 'sabre/vobject ', Version::VERSION, " duplicate contact merge tool\n";
  21. if ($argc < 3) {
  22. echo "\n";
  23. echo 'Usage: ', $argv[0], " input.vcf output.vcf [debug.log]\n";
  24. exit(1);
  25. }
  26. $input = fopen($argv[1], 'r');
  27. $output = fopen($argv[2], 'w');
  28. $debug = isset($argv[3]) ? fopen($argv[3], 'w') : null;
  29. $splitter = new Splitter\VCard($input);
  30. // The following properties are ignored. If they appear in some vcards
  31. // but not in others, we don't consider them for the sake of finding
  32. // differences.
  33. $ignoredProperties = [
  34. 'PRODID',
  35. 'VERSION',
  36. 'REV',
  37. 'UID',
  38. 'X-ABLABEL',
  39. ];
  40. $collectedNames = [];
  41. $stats = [
  42. 'Total vcards' => 0,
  43. 'No FN property' => 0,
  44. 'Ignored duplicates' => 0,
  45. 'Merged values' => 0,
  46. 'Error' => 0,
  47. 'Unique cards' => 0,
  48. 'Total written' => 0,
  49. ];
  50. function writeStats()
  51. {
  52. global $stats;
  53. foreach ($stats as $name => $value) {
  54. echo str_pad($name, 23, ' ', STR_PAD_RIGHT), str_pad($value, 6, ' ', STR_PAD_LEFT), "\n";
  55. }
  56. // Moving cursor back a few lines.
  57. echo "\033[".count($stats).'A';
  58. }
  59. function write($vcard)
  60. {
  61. global $stats, $output;
  62. ++$stats['Total written'];
  63. fwrite($output, $vcard->serialize()."\n");
  64. }
  65. while ($vcard = $splitter->getNext()) {
  66. ++$stats['Total vcards'];
  67. writeStats();
  68. $fn = isset($vcard->FN) ? (string) $vcard->FN : null;
  69. if (empty($fn)) {
  70. // Immediately write this vcard, we don't compare it.
  71. ++$stats['No FN property'];
  72. ++$stats['Unique cards'];
  73. write($vcard);
  74. $vcard->destroy();
  75. continue;
  76. }
  77. if (!isset($collectedNames[$fn])) {
  78. $collectedNames[$fn] = $vcard;
  79. ++$stats['Unique cards'];
  80. continue;
  81. } else {
  82. // Starting comparison for all properties. We only check if properties
  83. // in the current vcard exactly appear in the earlier vcard as well.
  84. foreach ($vcard->children() as $newProp) {
  85. if (in_array($newProp->name, $ignoredProperties)) {
  86. // We don't care about properties such as UID and REV.
  87. continue;
  88. }
  89. $ok = false;
  90. foreach ($collectedNames[$fn]->select($newProp->name) as $compareProp) {
  91. if ($compareProp->serialize() === $newProp->serialize()) {
  92. $ok = true;
  93. break;
  94. }
  95. }
  96. if (!$ok) {
  97. if ('EMAIL' === $newProp->name || 'TEL' === $newProp->name) {
  98. // We're going to make another attempt to find this
  99. // property, this time just by value. If we find it, we
  100. // consider it a success.
  101. foreach ($collectedNames[$fn]->select($newProp->name) as $compareProp) {
  102. if ($compareProp->getValue() === $newProp->getValue()) {
  103. $ok = true;
  104. break;
  105. }
  106. }
  107. if (!$ok) {
  108. // Merging the new value in the old vcard.
  109. $collectedNames[$fn]->add(clone $newProp);
  110. $ok = true;
  111. ++$stats['Merged values'];
  112. }
  113. }
  114. }
  115. if (!$ok) {
  116. // echo $newProp->serialize() . " does not appear in earlier vcard!\n";
  117. ++$stats['Error'];
  118. if ($debug) {
  119. fwrite($debug, "Missing '".$newProp->name."' property in duplicate. Earlier vcard:\n".$collectedNames[$fn]->serialize()."\n\nLater:\n".$vcard->serialize()."\n\n");
  120. }
  121. $vcard->destroy();
  122. continue 2;
  123. }
  124. }
  125. }
  126. $vcard->destroy();
  127. ++$stats['Ignored duplicates'];
  128. }
  129. foreach ($collectedNames as $vcard) {
  130. // Overwriting any old PRODID
  131. $vcard->PRODID = '-//Sabre//Sabre VObject '.Version::VERSION.'//EN';
  132. write($vcard);
  133. writeStats();
  134. }
  135. echo str_repeat("\n", count($stats)), "\nDone.\n";