PHPBB3.1 下论坛的 站点地图生成器


最近我把我的英文论坛给升级到了PHPBB3 https://codingforspeed.com/forum/ 但是似乎PHPBB3没有内置的站点地图生成功能 但是不要紧 用PHP就可以写个小脚本根据论坛的帖子生成站点地图:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
require_once('conn.php');
mysql_connect(DB_HOST, DB_USER, DB_PASSWORD);
mysql_select_db('forum');
 
$domain_root = 'https://codingforspeed.com/forum/'; 
header('Content-Type: text/xml; charset=utf-8');
 
$fid = -1;
if (isset($_GET['fid'])) {
  $fid = (integer)$_GET['fid'];
}
 
define("POSTS_TABLE", "phpbb_posts");
define("TOPICS_TABLE", "phpbb_topics");
 
if ($fid > 0) {
    echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    echo "<urlset xmlns=\"http://www.google.com/schemas/sitemap/0.84\">\n";
    $sql = "
        SELECT * FROM 
        `phpbb_forums`
        where `forum_id` = '$fid'";
 
    $result = mysql_query($sql) or die(mysql_error());
        
    $row = mysql_fetch_row($result);
    echo "<url>\n";
    echo "   <loc>${domain_root}viewforum.php?f=" . $fid . "</loc>\n";
    echo "   <changefreq>hourly</changefreq>\n";
    echo "</url>\n";
   
   // Forums with more that 1 Page
   if ( $row['forum_topics_approved'] > $row['forum_topics_per_page'] ) {
      $pages = $row['forum_topics_approved'] / $row['forum_topics_per_page'];
      for ($i = 1; $i < $pages; $i++) {
        $s = $s + $row['forum_topics_per_page'];
        echo '<url>'. "\n";
        echo '   <loc>' . $domain_root .  'viewforum.php?f=' . $fid . '&start=' . $s . '</loc>'. "\n";
        echo '   <changefreq>hourly</changefreq>'. "\n";
        echo '</url>'. "\n";
      }
   }
 
   $sql = 'SELECT
       t.topic_title, t.topic_posts_approved, t.topic_last_post_id, t.forum_id, t.topic_type, t.topic_id, 
       p.post_time, p.post_id
       FROM `' . TOPICS_TABLE . '` as `t`, `' . POSTS_TABLE . '` as `p`
       WHERE t.forum_id = '.$fid.'
          AND p.post_id = t.topic_last_post_id
          ORDER BY t.topic_type DESC, t.topic_last_post_id DESC';
    
    $result = mysql_query($sql) or die(mysql_error());
    
    while ($data = mysql_fetch_array($result)) {
       // 主题 
      echo '<url>'. "\n";
      echo '   <loc>'. $domain_root . 'viewtopic.php?f=' . $fid . '&t=' . $data['topic_id'] . '</loc>'. "\n";
      echo '   <lastmod>'.date('Y-m-d', $data['post_time']),'</lastmod>'. "\n";
      echo '</url>'. "\n";
 
      // 多于1页的主题
      if ( $data['topic_replies'] > $row['forum_topics_per_page'] ) {
        $s = 0;
        $pages = $data['topic_replies'] / $row['forum_topics_per_page'];
        for ($i = 1; $i < $pages; $i++) {
          $s = $s + $config['posts_per_page'];
          echo '<url>'. "\n";
          echo '   <loc>'. $domain_root . 'viewtopic.php?f=' . $fid . '&t=' . $data['topic_id'] . '&start=' . $s . '</loc>'. "\n";
          echo '   <lastmod>'.date('Y-m-d', $data['post_time']),'</lastmod>'. "\n";
          echo '</url>'. "\n";
        }
      }
    }
    echo '</urlset>';
} else { // 整体论坛站点索引
  echo '<?xml version="1.0" encoding="UTF-8"?>'."\n";
  echo '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n";
  $sql = 'SELECT * from `phpbb_forums`';
  $result = mysql_query($sql) or die(mysql_error());
  while ($row = mysql_fetch_array($result)) {
      echo '<sitemap>' . "\n";
      echo '<loc>'. $domain_root . 'sitemap.php?fid=' . $row['forum_id'] . '</loc>' . "\n";
      echo '</sitemap>'. "\n";
  }
  echo '</sitemapindex>';
}
require_once('conn.php');
mysql_connect(DB_HOST, DB_USER, DB_PASSWORD);
mysql_select_db('forum');

$domain_root = 'https://codingforspeed.com/forum/'; 
header('Content-Type: text/xml; charset=utf-8');

$fid = -1;
if (isset($_GET['fid'])) {
  $fid = (integer)$_GET['fid'];
}

define("POSTS_TABLE", "phpbb_posts");
define("TOPICS_TABLE", "phpbb_topics");

if ($fid > 0) {
    echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    echo "<urlset xmlns=\"http://www.google.com/schemas/sitemap/0.84\">\n";
    $sql = "
        SELECT * FROM 
        `phpbb_forums`
        where `forum_id` = '$fid'";
 
    $result = mysql_query($sql) or die(mysql_error());
        
    $row = mysql_fetch_row($result);
    echo "<url>\n";
    echo "   <loc>${domain_root}viewforum.php?f=" . $fid . "</loc>\n";
    echo "   <changefreq>hourly</changefreq>\n";
    echo "</url>\n";
   
   // Forums with more that 1 Page
   if ( $row['forum_topics_approved'] > $row['forum_topics_per_page'] ) {
      $pages = $row['forum_topics_approved'] / $row['forum_topics_per_page'];
      for ($i = 1; $i < $pages; $i++) {
        $s = $s + $row['forum_topics_per_page'];
        echo '<url>'. "\n";
        echo '   <loc>' . $domain_root .  'viewforum.php?f=' . $fid . '&start=' . $s . '</loc>'. "\n";
        echo '   <changefreq>hourly</changefreq>'. "\n";
        echo '</url>'. "\n";
      }
   }

   $sql = 'SELECT
       t.topic_title, t.topic_posts_approved, t.topic_last_post_id, t.forum_id, t.topic_type, t.topic_id, 
       p.post_time, p.post_id
       FROM `' . TOPICS_TABLE . '` as `t`, `' . POSTS_TABLE . '` as `p`
       WHERE t.forum_id = '.$fid.'
          AND p.post_id = t.topic_last_post_id
          ORDER BY t.topic_type DESC, t.topic_last_post_id DESC';
    
    $result = mysql_query($sql) or die(mysql_error());
    
    while ($data = mysql_fetch_array($result)) {
       // 主题 
      echo '<url>'. "\n";
      echo '   <loc>'. $domain_root . 'viewtopic.php?f=' . $fid . '&t=' . $data['topic_id'] . '</loc>'. "\n";
      echo '   <lastmod>'.date('Y-m-d', $data['post_time']),'</lastmod>'. "\n";
      echo '</url>'. "\n";

      // 多于1页的主题
      if ( $data['topic_replies'] > $row['forum_topics_per_page'] ) {
        $s = 0;
        $pages = $data['topic_replies'] / $row['forum_topics_per_page'];
        for ($i = 1; $i < $pages; $i++) {
          $s = $s + $config['posts_per_page'];
          echo '<url>'. "\n";
          echo '   <loc>'. $domain_root . 'viewtopic.php?f=' . $fid . '&t=' . $data['topic_id'] . '&start=' . $s . '</loc>'. "\n";
          echo '   <lastmod>'.date('Y-m-d', $data['post_time']),'</lastmod>'. "\n";
          echo '</url>'. "\n";
        }
      }
    }
    echo '</urlset>';
} else { // 整体论坛站点索引
  echo '<?xml version="1.0" encoding="UTF-8"?>'."\n";
  echo '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n";
  $sql = 'SELECT * from `phpbb_forums`';
  $result = mysql_query($sql) or die(mysql_error());
  while ($row = mysql_fetch_array($result)) {
      echo '<sitemap>' . "\n";
      echo '<loc>'. $domain_root . 'sitemap.php?fid=' . $row['forum_id'] . '</loc>' . "\n";
      echo '</sitemap>'. "\n";
  }
  echo '</sitemapindex>';
}

把上面的PHP代码保存成 sitemap.php 然后在浏览器测试 大概会得到这样的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=1</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=4</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=3</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=6</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=5</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=7</loc>
</sitemap>
</sitemapindex>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=1</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=4</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=3</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=6</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=5</loc>
</sitemap>
<sitemap>
<loc>https://codingforspeed.com/forum/sitemap.php?fid=7</loc>
</sitemap>
</sitemapindex>

网络爬虫可以轻易的读懂并跟踪里面的链接 比如

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
<url>
<loc>https://codingforspeed.com/forum/viewforum.php?f=4</loc>
<changefreq>hourly</changefreq>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=59
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=44
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=53
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=52
</loc>
<lastmod>2014-03-20</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=51
</loc>
<lastmod>2014-03-20</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=47
</loc>
<lastmod>2014-01-27</lastmod>
</url>
</urlset>
<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
<url>
<loc>https://codingforspeed.com/forum/viewforum.php?f=4</loc>
<changefreq>hourly</changefreq>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=59
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=44
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=53
</loc>
<lastmod>2015-06-28</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=52
</loc>
<lastmod>2014-03-20</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=51
</loc>
<lastmod>2014-03-20</lastmod>
</url>
<url>
<loc>
https://codingforspeed.com/forum/viewtopic.php?f=4&t=47
</loc>
<lastmod>2014-01-27</lastmod>
</url>
</urlset>

接下来在 robots.txt 里指明站点地图的路径, S 要大写, 必须写全URL

Sitemap: https://codingforspeed.com/forum/sitemap.php

必须了解到的是 站点地图必须对不同的USER AGENT都产生一样的输出 因为网络爬虫很有可能会被PHPBB3挡在门外.

在GOOGLE WEBMASTER 里把站点地图提交 然后就可以跟踪索引情况.

google-webmaster-sitemap PHPBB3.1 下论坛的 站点地图生成器 互联网 技术 折腾 网站信息与统计

google-webmaster-sitemap

还需要检查 网络爬虫是否能顺序索引论坛 因为这点默认是被PHPBB3禁用的.

英文: https://helloacm.com/creating-sitemap-generator-for-phpbb3-1-using-php/

GD Star Rating
loading...
本文一共 249 个汉字, 你数一下对不对.
PHPBB3.1 下论坛的 站点地图生成器. (AMP 移动加速版本)
上一篇: 穷举算法的应用 - 去除EXCEL文件中的保护
下一篇: 在英国多久到超市买一次菜合适?

扫描二维码,分享本文到微信朋友圈
a6a68208c43f9b27cacd9979c4f9e267 PHPBB3.1 下论坛的 站点地图生成器 互联网 技术 折腾 网站信息与统计

一条回应

评论