本技术教程为QQ空间文章抓取技巧,意为可以同步QQ空间文章,值得推荐。 /**//// <summary> /// 用户可以申请将自己的文章同步到某个板块实体类 /// </summary> public class QQZone { private int id; private string qq; private string username; private Guid userId; private int forumId; public int Id { get { return id; } set { id = value; } } public string QQ { get { return qq; } set { qq = value; } } public string Username { get { return username; } set { username = value; } } public Guid UserId { get { return userId; } set { userId = value; } } public int ForumId { get { return forumId; } set { forumId = value; } } public QQZone(int id,string qq,string username,Guid userId,int forumId) { this.id = id; this.qq = qq; this.username = username; this.userId = userId; this.forumId = forumId; } } /**//// <summary> /// 对抓过来的文章 /// </summary> public class QQZoneBlog { private string subject=string.Empty; private string body=string.Empty; private string tag=string.Empty; private List<string> replayContent = new List<string>(); public string Subject { get { return subject; } set { subject = value; } } public string Body { get { return body; } set { body = value; } } public string Tag { get { return tag; } set { tag = value; } } /**//// <summary> /// 把回复也搞过来了,只是没抓回复者的名称,在我论坛里都显示为匿名用户 /// </summary> public List<string> ReplayContent { get { return replayContent; } set { replayContent = value; } } } 下面的代码是根据QQ 号码获取文章Id /**//// <summary> /// 根据qq号码获取文章Id /// </summary> /// <param name="qq"></param> /// <returns></returns> public static List<string> GetBlogId(string qq) { string url = string.Format(@"http://u.cnc.qzone.qq.com/cgi-bin/cgi_qqzone_static.cgi?uin={0}&flag=1426064064&property=GoRE&vuin=0&t=1196748211",qq); List<string> result = new List<string>(); System.Net.WebClient client = new System.Net.WebClient(); System.IO.Stream stream = client.OpenRead(url); StringBuilder sb=new StringBuilder(); using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"))) { string str = string.Empty; while ((str = reader.ReadLine()) != null) { sb.Append(str); } } int index = sb.ToString().IndexOf("[{"); int endindex = sb.ToString().IndexOf("}]", 0); if (index == -1 || endindex == -1) { return result; } string tag = sb.ToString().Substring(index + 1, (endindex - index)).Replace("", "").Replace(",{", "{"); string[] tags = tag.Split(''); for (int i = 0; i < tags.Length; i++) { result.Add(tags[i].Replace("{"blogid":", "").Substring(0, tags[i].Replace("{"blogid":", "").IndexOf(","))); } return result; } 有了blogId ,就可以抓文章了 public static void CreatePost(QQZone zone, string blogId) { // 获得原始内容 StringBuilder sb = new StringBuilder(); string urlFormat = "http://b.cnc.qzone.qq.com/cgi-bin/blognew/blog_get_data?uin={0}&numperpage=15&blogid={1}&arch=0&pos=0&direct=1&r=0.339366103963674"; System.Net.WebClient client = new System.Net.WebClient(); using (Stream stream = client.OpenRead(string.Format(urlFormat,zone.QQ,blogId))) { using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"))) { string str = string.Empty; while ((str = reader.ReadLine()) != null) { sb.Append(str); sb.Append("r"); } } } // 提取所需信息 string[] content = sb.ToString().Split(new char[] { 'r' }); QQZoneBlog blog = new QQZoneBlog(); foreach (string s in content) { if (s.StartsWith(""category":")) { blog.Tag ="会员QQ空间,"+ s.Replace(""category":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }); } if (s.StartsWith(""title":")) { blog.Subject = s.Replace(""title":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }); } if (s.StartsWith(""content":")) { blog.Body = s.Replace(""content":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }).Replace(" ", "<br/>"); blog.Body = blog.Body + string.Format("<br/><br/><p>[url=http://user.qzone.qq.com/{0}/blog/{1}]查看原文[/url]</p>", zone.QQ, blogId); } if (s.StartsWith(""replycontent":")) { blog.ReplayContent.Add(s.Replace(""replycontent":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }).Replace(" ","<br/>")); } } if (blog.Subject.IndexOf("[No]") == -1) { // 插入帖子 ForumPost post = new ForumPost(); post.ForumId = zone.ForumId; post.Subject = blog.Subject; post.Body = blog.Body; post.TagsText = blog.Tag; post.PostType = PostType.HTML; ForumPost newPost = ForumPostManager.AddPost(post, UserManager.GetUser(zone.UserId)); DownloadImages(newPost); if (blog.ReplayContent.Count > 0) { ForumPost replaypost = new ForumPost(); User user = UserManager.GetAnonymousUser(true); replaypost.ForumId = zone.ForumId; replaypost.Subject = "Re:" + blog.Subject; replaypost.ParentId = newPost.PostId; replaypost.PostType = PostType.HTML; foreach (string body in blog.ReplayContent) { replaypost.Body = body; ForumPost newPostReplay = ForumPostManager.AddPost(replaypost, user); // qq 的图片防盗连,给他下载过来作为帖子的附件 DownloadImages(newPostReplay); } } CreateSyncHistory(zone.QQ, blogId); } } |