澳门新浦京娱乐场网站-www.146.net-新浦京娱乐场官网
做最好的网站

澳门新浦京娱乐场网站:脏字过滤算法,爬虫学

        Internet上的片段站点平日存在着镜像网址(mirror),即八个网址的内容同样但网页对应的域名区别。那样会变成对同样份网页爬虫重复抓取数次。为了防止这种意况,对于每1份抓取到的网页,它首先要求进入ContentSeen模块。该模块会剖断网页的情节是还是不是和已下载过的某部网页的原委同样,如果同样,则该网页不会再被送去开始展览下一步的管理。那样的做法能够明确的下落爬虫须要下载的网页数。至于如果判断三个网页的内容是还是不是同样,一般的思绪是这样的:并不会去一直比较八个网页的开始和结果,而是将网页的内容通过计算生成FingerPrint(指纹),经常FingerPrint是叁个牢固长度的字符串,要比网页的正文短许多。如若多少个网页的FingerPrint同样,则以为它们内容完全同样。

        为了成功这一模块,首先大家须要3个精锐的指纹算法,将大家的网页内容计算成指纹存入数据库,下一次直接剖断指纹在保存前透过指纹的对待就可以成功做到去重新操作。

        首先来看一下出名的谷歌(Google)公司动用的网页去重新算法SimHash吧:

        谷歌(Google)Moses Charikar发表的一篇散文“detecting near-duplicates for web crawling”中提议了simhash算法,特地用来消除亿万级其余网页的去重任务。

        SimHash作为locality sensitive hash(局地敏感哈希)的1种:

        其首要思考是降维,将高维的特色向量映射成低维的特征向量,通过三个向量的Hamming Distance来鲜明文章是还是不是再度恐怕中度类似。

        个中,Hamming Distance,又称汉明距离,在音信论中,四个等长字符串之间的汉明距离是五个字符串对应地方的两样字符的个数。也便是说,它正是将二个字符串转换来别的二个字符串所急需替换的字符个数。举个例子:拾11十1 与 拾0拾01之间的汉明距离是 二。至于大家常说的字符串编辑距离则是一般情势的汉明距离。

        如此,通过比较多少个文书档案的SimHash值的海明距离,能够拿走它们的相似度。

        详细情形能够看这里SimHash算法

____________________________________________________________________________________________________________

上面我们来举办代码完成:

using System;
using System.Collections.Generic;
using System.Linq;


namespace Crawler.Common { public class SimHashAnalyser {
private const int HashSize = 32;
public static float GetLikenessValue(string needle, string haystack, TokeniserType type = TokeniserType.Overlapping) { var needleSimHash = GetSimHash(needle, type); var hayStackSimHash = GetSimHash(haystack, type); return GetLikenessValue(needleSimHash, hayStackSimHash); }
public static float GetLikenessValue(int needleSimHash, int hayStackSimHash) { return (HashSize - GetHammingDistance(needleSimHash, hayStackSimHash)) / (float)HashSize; }
private static IEnumerable DoHashTokens(IEnumerable tokens) { return tokens.Select(token => token.GetHashCode()).ToList(); }
private static int GetHammingDistance(int firstValue, int secondValue) { var hammingBits = firstValue ^ secondValue; var hammingValue = 0; for (var i = 0; i < 32; i ) if (IsBitSet(hammingBits, i)) hammingValue = 1; return hammingValue; }
private static bool IsBitSet(int b, int pos) { return (b & (1 << pos)) != 0; }

public static int GetSimHash(string input) { return GetSimHash(input, TokeniserType.Overlapping); }
public static int GetSimHash(string input, TokeniserType tokeniserType) { ITokeniser tokeniser; if (tokeniserType == TokeniserType.Overlapping) tokeniser = new OverlappingStringTokeniser(); else tokeniser = new FixedSizeStringTokeniser();
var hashedtokens = DoHashTokens(tokeniser.Tokenise(input)); var vector = new int[HashSize]; for (var i = 0; i < HashSize; i ) vector[i] = 0;
foreach (var value in hashedtokens) for (var j = 0; j < HashSize; j ) if (IsBitSet(value, j)) vector[j] = 1; else vector[j] -= 1; var fingerprint = 0; for (var i = 0; i < HashSize; i ) if (vector[i] > 0) fingerprint = 1 << i; return fingerprint; }
}
public interface ITokeniser { IEnumerable Tokenise(string input); }
public class FixedSizeStringTokeniser : ITokeniser { private readonly ushort _tokensize; public FixedSizeStringTokeniser(ushort tokenSize = 5) { if (tokenSize < 二) throw new ArgumentException("Token 不能够超出范围"); if (tokenSize > 1二7) throw new ArgumentException("Token 不可能越过范围"); _tokensize = tokenSize; }
public IEnumerable Tokenise(string input) { var chunks = new List(); var offset = 0; while (offset < input.Length) { chunks.Add(new string(input.Skip(offset).Take(_tokensize).ToArray())); offset = _tokensize; } return chunks; }
}
public class OverlappingStringTokeniser : ITokeniser {
private readonly ushort _chunkSize; private readonly ushort _overlapSize;
public OverlappingStringTokeniser(ushort chunkSize = 4, ushort overlapSize = 3) { if (chunkSize <= overlapSize) throw new ArgumentException("Chunck 必须高于 overlap"); _overlapSize = overlapSize; _chunkSize = chunkSize; }
public IEnumerable Tokenise(string input) { var result = new List(); var position = 0; while (position < input.Length - _chunkSize) { result.Add(input.Substring(position, _chunkSize)); position = _chunkSize - _overlapSize; } return result; }

}
public enum TokeniserType { Overlapping, FixedSize } }

 

调用方法如下:

var s1 = "the cat sat on the mat.";
var s2 = "the cat sat on a mat.";


var similarity = SimHashAnalyser.GetLikenessValue(s1, s2);
Console.Clear(); Console.WriteLine("相似度: {0}%", similarity * 100); Console.ReadKey();

 

输出为:

相似度: 78.125%

接下来就是对ContentSeen模块的简单封装:

using Crawler.Common;


namespace Crawler.Processing { ///

/// 对于每一份抓取到的网页,它首先需要进入Content Seen模块。该模块会判断网页的内容是否和已下载过的某个网页的内容一致,如果一致,则该网页不会再被送去进行下一步的处理。 /// public class ContentSeen { public static int GetFingerPrint(string html) { return SimHashAnalyser.GetSimHash(html); }
public static float Similarity(int print1, int print2) { return SimHashAnalyser.GetLikenessValue(print1, print2); }
} }

 

print?/// <summary>  
 /// List转成json  
 /// </summary>  
 /// <typeparam name="T"></typeparam>  
 /// <param name="jsonName"></param>  
 /// <param name="list"></param>  
 /// <returns></returns>  
 public static string ListToJson<T>(IList<T> list, string jsonName) 
 { 
     StringBuilder Json = new StringBuilder(); 
     if (string.IsNullOrEmpty(jsonName)) 
         jsonName = list[0].GetType().Name; 
     Json.Append("{"" jsonName "":["); 
     if (list.Count > 0) 
     { 
         for (int i = 0; i < list.Count; i ) 
         { 
             T obj = Activator.CreateInstance<T>(); 
             PropertyInfo[] pi = obj.GetType().GetProperties(); 
             Json.Append("{"); 
             for (int j = 0; j < pi.Length; j ) 
             { 
                 Type type = pi[j].GetValue(list[i], null).GetType(); 
                 Json.Append(""" pi[j].Name.ToString() "":" StringFormat(pi[j].GetValue(list[i], null).ToString(), type)); 
 
                 if (j < pi.Length - 1) 
                 { 
                     Json.Append(","); 
                 } 
             } 
             Json.Append("}"); 
             if (i < list.Count - 1) 
             { 
                 Json.Append(","); 
             } 
         } 
     } 
     Json.Append("]}"); 
     return Json.ToString(); 
 } 
 
 /// <summary>  
 /// List转成json  
 /// </summary>  
 /// <typeparam name="T"></typeparam>  
 /// <param name="list"></param>  
 /// <returns></returns>  
 public static string ListToJson<T>(IList<T> list) 
 { 
     object obj = list[0]; 
     return ListToJson<T>(list, obj.GetType().Name); 
 } 
 
 /// <summary>  
 /// 对象调换为Json字符串  
 /// </summary>  
 /// <param name="jsonObject">对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToJson(object jsonObject) 
 { 
     try 
     { 
         StringBuilder jsonString = new StringBuilder(); 
         jsonString.Append("{"); 
         PropertyInfo[] propertyInfo = jsonObject.GetType().GetProperties(); 
         for (int i = 0; i < propertyInfo.Length; i ) 
         { 
             object objectValue = propertyInfo[i].GetGetMethod().Invoke(jsonObject, null); 
             if (objectValue == null) 
             { 
                 continue; 
             } 
             StringBuilder value = new StringBuilder(); 
             if (objectValue is DateTime || objectValue is Guid || objectValue is TimeSpan) 
             { 
                 value.Append(""" objectValue.ToString() """); 
             } 
             else if (objectValue is string) 
             { 
                 value.Append(""" objectValue.ToString() """); 
             } 
             else if (objectValue is IEnumerable) 
             { 
                 value.Append(ToJson((IEnumerable)objectValue)); 
             } 
             else 
             { 
                 value.Append(""" objectValue.ToString() """); 
             } 
             jsonString.Append(""" propertyInfo[i].Name "":" value ","); ; 
         } 
         return jsonString.ToString().TrimEnd(',') "}"; 
     } 
     catch (Exception ex) 
     { 
         throw ex; 
     } 
 } 
 
 /// <summary>  
 /// 对象集结账和转账换Json  
 /// </summary>  
 /// <param name="array">集结对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToJson(IEnumerable array) 
 { 
     string jsonString = "["; 
     foreach (object item in array) 
     { 
         jsonString = ToJson(item) ","; 
     } 
     jsonString.Remove(jsonString.Length - 1, jsonString.Length); 
     return jsonString "]"; 
 } 
 
 /// <summary>  
 /// 普通集合调换Json  
 /// </summary>  
 /// <param name="array">集合对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToArrayString(IEnumerable array) 
 { 
     string jsonString = "["; 
     foreach (object item in array) 
     { 
         jsonString = ToJson(item.ToString()) ","; 
     } 
     jsonString.Remove(jsonString.Length - 1, jsonString.Length); 
     return jsonString "]"; 
 } 
 
 /// <summary>  
 /// Datatable转换为Json  
 /// </summary>  
 /// <param name="table">Datatable对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToJson(DataTable dt) 
 { 
     StringBuilder jsonString = new StringBuilder(); 
     jsonString.Append("["); 
     DataRowCollection drc = dt.Rows; 
     for (int i = 0; i < drc.Count; i ) 
     { 
         jsonString.Append("{"); 
         for (int j = 0; j < dt.Columns.Count; j ) 
         { 
             string strKey = dt.Columns[j].ColumnName; 
             string strValue = drc[i][j].ToString(); 
             Type type = dt.Columns[j].DataType; 
             jsonString.Append(""" strKey "":"); 
             strValue = StringFormat(strValue, type); 
             if (j < dt.Columns.Count - 1) 
             { 
                 jsonString.Append(strValue ","); 
             } 
             else 
             { 
                 jsonString.Append(strValue); 
             } 
         } 
         jsonString.Append("},"); 
     } 
     jsonString.Remove(jsonString.Length - 1, 1); 
     jsonString.Append("]"); 
     return jsonString.ToString(); 
 } 
 
 /// <summary>  
 /// DataTable转成Json  
 /// </summary>  
 /// <param name="jsonName"></param>  
 /// <param name="dt"></param>  
 /// <returns></returns>  
 public static string ToJson(DataTable dt, string jsonName) 
 { 
     StringBuilder Json = new StringBuilder(); 
     if (string.IsNullOrEmpty(jsonName)) 
         jsonName = dt.TableName; 
     Json.Append("{"" jsonName "":["); 
     if (dt.Rows.Count > 0) 
     { 
         for (int i = 0; i < dt.Rows.Count; i ) 
         { 
             Json.Append("{"); 
             for (int j = 0; j < dt.Columns.Count; j ) 
             { 
                 Type type = dt.Rows[i][j].GetType(); 
                 Json.Append(""" dt.Columns[j].ColumnName.ToString() "":" StringFormat(dt.Rows[i][j].ToString(), type)); 
                 if (j < dt.Columns.Count - 1) 
                 { 
                     Json.Append(","); 
                 } 
             } 
             Json.Append("}"); 
             if (i < dt.Rows.Count - 1) 
             { 
                 Json.Append(","); 
             } 
         } 
     } 
     Json.Append("]}"); 
     return Json.ToString(); 
 } 
 
 /// <summary>  
 /// DataReader转换为Json  
 /// </summary>  
 /// <param name="dataReader">DataReader对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToJson(IDataReader dataReader) 
 { 
     StringBuilder jsonString = new StringBuilder(); 
     jsonString.Append("["); 
 
     while (dataReader.Read()) 
     { 
         jsonString.Append("{"); 
         for (int i = 0; i < dataReader.FieldCount; i ) 
         { 
             Type type = dataReader.GetFieldType(i); 
             string strKey = dataReader.GetName(i); 
             string strValue = dataReader[i].ToString(); 
             jsonString.Append(""" strKey "":"); 
             strValue = StringFormat(strValue, type); 
             if (i < dataReader.FieldCount - 1) 
             { 
                 jsonString.Append(strValue ","); 
             } 
             else 
             { 
                 jsonString.Append(strValue); 
             } 
         } 
         jsonString.Append("},"); 
     } 
     dataReader.Close(); 
     jsonString.Remove(jsonString.Length - 1, 1); 
     jsonString.Append("]"); 
     if (jsonString.Length == 1) 
     { 
         return "[]"; 
     } 
     return jsonString.ToString(); 
 } 
 
 /// <summary>  
 /// DataSet转换为Json  
 /// </summary>  
 /// <param name="dataSet">DataSet对象</param>  
 /// <returns>Json字符串</returns>  
 public static string ToJson(DataSet dataSet) 
 { 
     string jsonString = "{"; 
     foreach (DataTable table in dataSet.Tables) 
     { 
         jsonString = """ table.TableName "":" ToJson(table)

原文

C#下PRADOSA算法的落到实处(适用于支付宝和易宝支付)

 

 

目录(?)[-]

  1. ENCORESA算法代码
  2. 中华VSA算法测试代码

 

  • ","; 
         } 
         jsonString = jsonString.TrimEnd(','); 
         return jsonString "}"; 
     } 
     
     /// <summary>  
     /// 过滤特殊字符  
     /// </summary>  
     /// <param name="s"></param>  
     /// <returns></returns>  
     private static string String2Json(String s) 
     { 
         StringBuilder sb = new StringBuilder(); 
         for (int i = 0; i < s.Length; i ) 
         { 
             char c = s.ToCharArray()[i]; 
             switch (c) 
             { 
                 case '"': 
                     sb.Append("\""); break; 
                 case '\': 
                     sb.Append("\\"); break; 
                 case '/': 
                     sb.Append("\/"); break; 
                 case 'b': 
                     sb.Append("\b"); break; 
                 case 'f': 
                     sb.Append("\f"); break; 
                 case 'n': 
                     sb.Append("\n"); break; 
                 case 'r': 
                     sb.Append("\r"); break; 
                 case 't': 
                     sb.Append("\t"); break; 
                 default: 
                     sb.Append(c); break; 
             } 
         } 
         return sb.ToString(); 
     } 
     
     /// <summary>  
     /// 格式化字符型、日期型、布尔型  
     /// </summary>  
     /// <param name="str"></param>  
     /// <param name="type"></param>  
     /// <returns></returns>  
     private static string StringFormat(string str, Type type) 
     { 
         if (type != typeof(string) && string.IsNullOrEmpty(str)) 
         { 
             str = """ str """; 
         } 
         else if (type == typeof(string)) 
         { 
             str = String2Json(str); 
             str = """ str """; 
         } 
         else if (type == typeof(DateTime)) 
         { 
             str = """ str.Split(' ')[0] """; 
         } 
         else if (type == typeof(bool)) 
         { 
             str = str.ToLower(); 
         } 
     
         return str; 
     } 

程序包下载Word.rar

BMWX三SA算法代码:

 

[csharp] view plain copy

 

  1. using System;  
  2. using System.Collections.Generic;  
  3. using System.Text;  
  4. using System.IO;  
  5. using System.Security.Cryptography;  
  6.   
  7. namespace RSA.Class  
  8. {  
  9.     /// <summary>  
  10.     /// 类名:RSAFromPkcs8  
  11.     /// 功能:RSA加密、解密、签名、验签  
  12.     /// 详细:该类对Java生成的密钥进行解密和签字以及验签专项使用类,无需修改  
  13.     /// 版本:3.0  
  14.     /// 日期:2013-07-08  
  15.     /// 说明:  
  16.     /// 以下代码只是为了方便厂商测试而提供的样例代码,厂商能够依据本身网址的需求,根据本领文书档案编写,并非必然要动用该代码。  
  17.     /// </summary>  
  18.     public sealed class RSAFromPkcs8  
  19.     {  
  20.         /// <summary>  
  21.         /// 签名  
  22.         /// </summary>  
  23.         /// <param name="content">待具名字符串</param>  
  24.         /// <param name="privateKey">私钥</param>  
  25.         /// <param name="input_charset">编码格式</param>  
  26.         /// <returns>具名后字符串</returns>  
  27.         public static string sign(string content, string privateKey, string input_charset)  
  28.         {  
  29.             byte[] Data = Encoding.GetEncoding(input_charset).GetBytes(content);  
  30.             RSACryptoServiceProvider rsa = DecodePemPrivateKey(privateKey);  
  31.             SHA1 sh = new SHA1CryptoServiceProvider();  
  32.             byte[] signData = rsa.SignData(Data, sh);  
  33.             return Convert.ToBase64String(signData);  
  34.         }  
  35.   
  36.         /// <summary>  
  37.         /// 验签  
  38.         /// </summary>  
  39.         /// <param name="content">待验具名符串</param>  
  40.         /// <param name="signedString">签名</param>  
  41.         /// <param name="publicKey">公钥</param>  
  42.         /// <param name="input_charset">编码格式</param>  
  43.         /// <returns>true(通过),false(不通过)</returns>  
  44.         public static bool verify(string content, string signedString, string publicKey, string input_charset)  
  45.         {  
  46.             bool result = false;  
  47.             byte[] Data = Encoding.GetEncoding(input_charset).GetBytes(content);  
  48.             byte[] data = Convert.FromBase64String(signedString);  
  49.             RSAParameters paraPub = ConvertFromPublicKey(publicKey);  
  50.             RSACryptoServiceProvider rsaPub = new RSACryptoServiceProvider();  
  51.             rsaPub.ImportParameters(paraPub);  
  52.             SHA1 sh = new SHA1CryptoServiceProvider();  
  53.             result = rsaPub.VerifyData(Data, sh, data);  
  54.             return result;  
  55.         }  
  56.   
  57.         /// <summary>  
  58.         /// 加密  
  59.         /// </summary>  
  60.         /// <param name="resData">必要加密的字符串</param>  
  61.         /// <param name="publicKey">公钥</param>  
  62.         /// <param name="input_charset">编码格式</param>  
  63.         /// <returns>明文</returns>  
  64.         public static string encryptData(string resData, string publicKey, string input_charset)  
  65.         {  
  66.             byte[] DataToEncrypt = Encoding.ASCII.GetBytes(resData);  
  67.             string result = encrypt(DataToEncrypt, publicKey, input_charset);  
  68.             return result;  
  69.         }  
  70.   
  71.   
  72.         /// <summary>  
  73.         /// 解密  
  74.         /// </summary>  
  75.         /// <param name="resData">加密字符串</param>  
  76.         /// <param name="privateKey">私钥</param>  
  77.         /// <param name="input_charset">编码格式</param>  
  78.         /// <returns>明文</returns>  
  79.         public static string decryptData(string resData, string privateKey, string input_charset)  
  80.         {  
  81.             byte[] DataToDecrypt = Convert.FromBase64String(resData);  
  82.             string result = "";  
  83.             for (int j = 0; j < DataToDecrypt.Length / 128; j )  
  84.             {  
  85.                 byte[] buf = new byte[128];  
  86.                 for (int i = 0; i < 128; i )  
  87.                 {  
  88.   
  89.                     buf[i] = DataToDecrypt[i   128 * j];  
  90.                 }  
  91.                 result  = decrypt(buf, privateKey, input_charset);  
  92.             }  
  93.             return result;  
  94.         }  
  95.  
  96.         #region 内部方法  
  97.   
  98.         private static string encrypt(byte[] data, string publicKey, string input_charset)  
  99.         {  
  100.             RSACryptoServiceProvider rsa = DecodePemPublicKey(publicKey);  
  101.             SHA1 sh = new SHA1CryptoServiceProvider();  
  102.             byte[] result = rsa.Encrypt(data, false);  
  103.               
  104.             return Convert.ToBase64String(result);  
  105.         }  
  106.   
  107.         private static string decrypt(byte[] data, string privateKey, string input_charset)  
  108.         {  
  109.             string result = "";  
  110.             RSACryptoServiceProvider rsa = DecodePemPrivateKey(privateKey);  
  111.             SHA1 sh = new SHA1CryptoServiceProvider();  
  112.             byte[] source = rsa.Decrypt(data, false);  
  113.             char[] asciiChars = new char[Encoding.GetEncoding(input_charset).GetCharCount(source, 0, source.Length)];  
  114.             Encoding.GetEncoding(input_charset).GetChars(source, 0, source.Length, asciiChars, 0);  
  115.             result = new string(asciiChars);  
  116.             //result = ASCIIEncoding.ASCII.GetString(source);  
  117.             return result;  
  118.         }  
  119.   
  120.         private static RSACryptoServiceProvider DecodePemPublicKey(String pemstr)  
  121.         {  
  122.             byte[] pkcs8publickkey;  
  123.             pkcs8publickkey = Convert.FromBase64String(pemstr);  
  124.             if (pkcs8publickkey != null)  
  125.             {  
  126.                 RSACryptoServiceProvider rsa = DecodeRSAPublicKey(pkcs8publickkey);  
  127.                 return rsa;  
  128.             }  
  129.             else  
  130.                 return null;  
  131.         }  
  132.           
  133.         private static RSACryptoServiceProvider DecodePemPrivateKey(String pemstr)  
  134.         {  
  135.             byte[] pkcs8privatekey;  
  136.             pkcs8privatekey = Convert.FromBase64String(pemstr);  
  137.             if (pkcs8privatekey != null)  
  138.             {  
  139.                 RSACryptoServiceProvider rsa = DecodePrivateKeyInfo(pkcs8privatekey);  
  140.                 return rsa;  
  141.             }  
  142.             else  
  143.                 return null;  
  144.         }  
  145.   
  146.         private static RSACryptoServiceProvider DecodePrivateKeyInfo(byte[] pkcs8)  
  147.         {  
  148.             byte[] SeqOID = { 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00 };  
  149.             byte[] seq = new byte[15];  
  150.   
  151.             MemoryStream mem = new MemoryStream(pkcs8);  
  152.             int lenstream = (int)mem.Length;  
  153.             BinaryReader binr = new BinaryReader(mem);    //wrap Memory Stream with BinaryReader for easy reading  
  154.             byte bt = 0;  
  155.             ushort twobytes = 0;  
  156.   
  157.             try  
  158.             {  
  159.                 twobytes = binr.ReadUInt16();  
  160.                 if (twobytes == 0x8130)    //data read as little endian order (actual data order for Sequence is 30 81)  
  161.                     binr.ReadByte();    //advance 1 byte  
  162.                 else if (twobytes == 0x8230)  
  163.                     binr.ReadInt16();    //advance 2 bytes  
  164.                 else  
  165.                     return null;  
  166.   
  167.                 bt = binr.ReadByte();  
  168.                 if (bt != 0x02)  
  169.                     return null;  
  170.   
  171.                 twobytes = binr.ReadUInt16();  
  172.   
  173.                 if (twobytes != 0x0001)  
  174.                     return null;  
  175.   
  176.                 seq = binr.ReadBytes(15);        //read the Sequence OID  
  177.                 if (!CompareBytearrays(seq, SeqOID))    //make sure Sequence for OID is correct  
  178.                     return null;  
  179.   
  180.                 bt = binr.ReadByte();  
  181.                 if (bt != 0x04)    //expect an Octet string  
  182.                     return null;  
  183.   
  184.                 bt = binr.ReadByte();        //read next byte, or next 2 bytes is  0x81 or 0x82; otherwise bt is the byte count  
  185.                 if (bt == 0x81)  
  186.                     binr.ReadByte();  
  187.                 else  
  188.                     if (bt == 0x82)  
  189.                         binr.ReadUInt16();  
  190.                 //------ at this stage, the remaining sequence should be the RSA private key  
  191.   
  192.                 byte[] rsaprivkey = binr.ReadBytes((int)(lenstream - mem.Position));  
  193.                 RSACryptoServiceProvider rsacsp = DecodeRSAPrivateKey(rsaprivkey);  
  194.                 return rsacsp;  
  195.             }  
  196.   
  197.             catch (Exception)  
  198.             {  
  199.                 return null;  
  200.             }  
  201.   
  202.             finally { binr.Close(); }  
  203.   
  204.         }  
  205.   
  206.         private static bool CompareBytearrays(byte[] a, byte[] b)  
  207.         {  
  208.             if (a.Length != b.Length)  
  209.                 return false;  
  210.             int i = 0;  
  211.             foreach (byte c in a)  
  212.             {  
  213.                 if (c != b[i])  
  214.                     return false;  
  215.                 i ;  
  216.             }  
  217.             return true;  
  218.         }  
  219.   
  220.         private static RSACryptoServiceProvider DecodeRSAPublicKey(byte[] publickey)  
  221.         {  
  222.             // encoded OID sequence for  PKCS #1 rsaEncryption szOID_RSA_RSA = "1.2.840.113549.1.1.1"  
  223.             byte[] SeqOID = { 0x30, 0x0D, 0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x05, 0x00 };  
  224.             byte[] seq = new byte[15];  
  225.             // ---------  Set up stream to read the asn.1 encoded SubjectPublicKeyInfo blob  ------  
  226.             MemoryStream mem = new MemoryStream(publickey);  
  227.             BinaryReader binr = new BinaryReader(mem);    //wrap Memory Stream with BinaryReader for easy reading  
  228.             byte bt = 0;  
  229.             ushort twobytes = 0;  
  230.   
  231.             try  
  232.             {  
  233.   
  234.                 twobytes = binr.ReadUInt16();  
  235.                 if (twobytes == 0x8130) //data read as little endian order (actual data order for Sequence is 30 81)  
  236.                     binr.ReadByte();    //advance 1 byte  
  237.                 else if (twobytes == 0x8230)  
  238.                     binr.ReadInt16();   //advance 2 bytes  
  239.                 else  
  240.                     return null;  
  241.   
  242.                 seq = binr.ReadBytes(15);       //read the Sequence OID  
  243.                 if (!CompareBytearrays(seq, SeqOID))    //make sure Sequence for OID is correct  
  244.                     return null;  
  245.   
  246.                 twobytes = binr.ReadUInt16();  
  247.                 if (twobytes == 0x8103) //data read as little endian order (actual data order for Bit String is 03 81)  
  248.                     binr.ReadByte();    //advance 1 byte  
  249.                 else if (twobytes == 0x8203)  
  250.                     binr.ReadInt16();   //advance 2 bytes  
  251.                 else  
  252.                     return null;  
  253.   
  254.                 bt = binr.ReadByte();  
  255.                 if (bt != 0x00)     //expect null byte next  
  256.                     return null;  
  257.   
  258.                 twobytes = binr.ReadUInt16();  
  259.                 if (twobytes == 0x8130) //data read as little endian order (actual data order for Sequence is 30 81)  
  260.                     binr.ReadByte();    //advance 1 byte  
  261.                 else if (twobytes == 0x8230)  
  262.                     binr.ReadInt16();   //advance 2 bytes  
  263. 澳门新浦京娱乐场网站:脏字过滤算法,爬虫学习笔记。                else  
  264.                     return null;  
  265.   
  266.                 twobytes = binr.ReadUInt16();  
  267.                 byte lowbyte = 0x00;  
  268.                 byte highbyte = 0x00;  
  269.   
  270.                 if (twobytes == 0x8102) //data read as little endian order (actual data order for Integer is 02 81)  
  271.                     lowbyte = binr.ReadByte();  // read next bytes which is bytes in modulus  
  272.                 else if (twobytes == 0x8202)  
  273.                 {  
  274.                     highbyte = binr.ReadByte(); //advance 2 bytes  
  275.                     lowbyte = binr.ReadByte();  
  276.                 }  
  277.                 else  
  278.                     return null;  
  279.                 byte[] modint = { lowbyte, highbyte, 0x00, 0x00 };   //reverse byte order since asn.1 key uses big endian order  
  280.                 int modsize = BitConverter.ToInt32(modint, 0);  
  281.   
  282.                 byte firstbyte = binr.ReadByte();  
  283.                 binr.BaseStream.Seek(-1, SeekOrigin.Current);  
  284.   
  285.                 if (firstbyte == 0x00)  
  286.                 {   //if first byte (highest order) of modulus is zero, don't include it  
  287.                     binr.ReadByte();    //skip this null byte  
  288.                     modsize -= 1;   //reduce modulus buffer size by 1  
  289.                 }  
  290.   
  291.                 byte[] modulus = binr.ReadBytes(modsize);   //read the modulus bytes  
  292.   
  293.                 if (binr.ReadByte() != 0x02)            //expect an Integer for the exponent data  
  294.                     return null;  
  295.                 int expbytes = (int)binr.ReadByte();        // should only need one byte for actual exponent data (for all useful values)  
  296.                 byte[] exponent = binr.ReadBytes(expbytes);  
  297.   
  298.                 // ------- create RSACryptoServiceProvider instance and initialize with public key -----  
  299.                 RSACryptoServiceProvider RSA = new RSACryptoServiceProvider();  
  300.                 RSAParameters RSAKeyInfo = new RSAParameters();  
  301.                 RSAKeyInfo.Modulus = modulus;  
  302.                 RSAKeyInfo.Exponent = exponent;  
  303.                 RSA.ImportParameters(RSAKeyInfo);  
  304.                 return RSA;  
  305.             }  
  306.             catch (Exception)  
  307.             {  
  308.                 return null;  
  309.             }  
  310.   
  311.             finally { binr.Close(); }  
  312.   
  313.         }  
  314.   
  315.         private static RSACryptoServiceProvider DecodeRSAPrivateKey(byte[] privkey)  
  316.         {  
  317.             byte[] MODULUS, E, D, P, Q, DP, DQ, IQ;  
  318.   
  319.             // ---------  Set up stream to decode the asn.1 encoded RSA private key  ------  
  320.             MemoryStream mem = new MemoryStream(privkey);  
  321.             BinaryReader binr = new BinaryReader(mem);    //wrap Memory Stream with BinaryReader for easy reading  
  322.             byte bt = 0;  
  323.             ushort twobytes = 0;  
  324.             int elems = 0;  
  325.             try  
  326.             {  
  327.                 twobytes = binr.ReadUInt16();  
  328.                 if (twobytes == 0x8130)    //data read as little endian order (actual data order for Sequence is 30 81)  
  329.                     binr.ReadByte();    //advance 1 byte  
  330.                 else if (twobytes == 0x8230)  
  331.                     binr.ReadInt16();    //advance 2 bytes  
  332.                 else  
  333.                     return null;  
  334.   
  335.                 twobytes = binr.ReadUInt16();  
  336.                 if (twobytes != 0x0102)    //version number  
  337.                     return null;  
  338.                 bt = binr.ReadByte();  
  339.                 if (bt != 0x00)  
  340.                     return null;  
  341.   
  342.   
  343.                 //------  all private key components are Integer sequences ----  
  344.                 elems = GetIntegerSize(binr);  
  345.                 MODULUS = binr.ReadBytes(elems);  
  346.   
  347.                 elems = GetIntegerSize(binr);  
  348.                 E = binr.ReadBytes(elems);  
  349.   
  350.                 elems = GetIntegerSize(binr);  
  351.                 D = binr.ReadBytes(elems);  
  352.   
  353.                 elems = GetIntegerSize(binr);  
  354.                 P = binr.ReadBytes(elems);  
  355.   
  356.                 elems = GetIntegerSize(binr);  
  357.                 Q = binr.ReadBytes(elems);  
  358.   
  359.                 elems = GetIntegerSize(binr);  
  360.                 DP = binr.ReadBytes(elems);  
  361.   
  362.                 elems = GetIntegerSize(binr);  
  363.                 DQ = binr.ReadBytes(elems);  
  364.   
  365.                 elems = GetIntegerSize(binr);  
  366.                 IQ = binr.ReadBytes(elems);  
  367.   
  368.                 // ------- create RSACryptoServiceProvider instance and initialize with public key -----  
  369.                 RSACryptoServiceProvider RSA = new RSACryptoServiceProvider();  
  370.                 RSAParameters RSAparams = new RSAParameters();  
  371.                 RSAparams.Modulus = MODULUS;  
  372.                 RSAparams.Exponent = E;  
  373.                 RSAparams.D = D;  
  374.                 RSAparams.P = P;  
  375.                 RSAparams.Q = Q;  
  376.                 RSAparams.DP = DP;  
  377.                 RSAparams.DQ = DQ;  
  378.                 RSAparams.InverseQ = IQ;  
  379.                 RSA.ImportParameters(RSAparams);  
  380.                 return RSA;  
  381.             }  
  382.             catch (Exception)  
  383.             {  
  384.                 return null;  
  385.             }  
  386.             finally { binr.Close(); }  
  387.         }  
  388.   
  389.         private static int GetIntegerSize(BinaryReader binr)  
  390.         {  
  391.             byte bt = 0;  
  392.             byte lowbyte = 0x00;  
  393.             byte highbyte = 0x00;  
  394.             int count = 0;  
  395.             bt = binr.ReadByte();  
  396.             if (bt != 0x02)        //expect integer  
  397.                 return 0;  
  398.             bt = binr.ReadByte();  
  399.   
  400.             if (bt == 0x81)  
  401.                 count = binr.ReadByte();    // data size in next byte  
  402.             else  
  403.                 if (bt == 0x82)  
  404.                 {  
  405.                     highbyte = binr.ReadByte();    // data size in next 2 bytes  
  406.                     lowbyte = binr.ReadByte();  
  407.                     byte[] modint = { lowbyte, highbyte, 0x00, 0x00 };  
  408.                     count = BitConverter.ToInt32(modint, 0);  
  409.                 }  
  410.                 else  
  411.                 {  
  412.                     count = bt;        // we already have the data size  
  413.                 }  
  414.   
  415.   
  416.   
  417.             while (binr.ReadByte() == 0x00)  
  418.             {    //remove high order zeros in data  
  419.                 count -= 1;  
  420.             }  
  421.             binr.BaseStream.Seek(-1, SeekOrigin.Current);        //last ReadByte wasn't a removed zero, so back up a byte  
  422.             return count;  
  423.         }  
  424.  
  425.         #endregion  
  426.  
  427.         #region 解析.net 生成的Pem  
  428.         private static RSAParameters ConvertFromPublicKey(string pemFileConent)  
  429.         {  
  430.   
  431.             byte[] keyData = Convert.FromBase64String(pemFileConent);  
  432.             if (keyData.Length < 162)  
  433.             {  
  434.                 throw new ArgumentException("pem file content is incorrect.");  
  435.             }  
  436.             byte[] pemModulus = new byte[128];  
  437.             byte[] pemPublicExponent = new byte[3];  
  438.             Array.Copy(keyData, 29, pemModulus, 0, 128);  
  439.             Array.Copy(keyData, 159, pemPublicExponent, 0, 3);  
  440.             RSAParameters para = new RSAParameters();  
  441.             para.Modulus = pemModulus;  
  442.             para.Exponent = pemPublicExponent;  
  443.             return para;  
  444.         }  
  445.   
  446.         private static RSAParameters ConvertFromPrivateKey(string pemFileConent)  
  447.         {  
  448.             byte[] keyData = Convert.FromBase64String(pemFileConent);  
  449.             if (keyData.Length < 609)  
  450.             {  
  451.                 throw new ArgumentException("pem file content is incorrect.");  
  452.             }  
  453.   
  454.             int index = 11;  
  455.             byte[] pemModulus = new byte[128];  
  456.             Array.Copy(keyData, index, pemModulus, 0, 128);  
  457.   
  458.             index  = 128;  
  459.             index  = 2;//141  
  460.             byte[] pemPublicExponent = new byte[3];  
  461.             Array.Copy(keyData, index, pemPublicExponent, 0, 3);  
  462.   
  463.             index  = 3;  
  464.             index  = 4;//148  
  465.             byte[] pemPrivateExponent = new byte[128];  
  466.             Array.Copy(keyData, index, pemPrivateExponent, 0, 128);  
  467.   
  468.             index  = 128;  
  469.             index  = ((int)keyData[index   1] == 64 ? 2 : 3);//279  
  470.             byte[] pemPrime1 = new byte[64];  
  471.             Array.Copy(keyData, index, pemPrime1, 0, 64);  
  472.   
  473.             index  = 64;  
  474.             index  = ((int)keyData[index   1] == 64 ? 2 : 3);//346  
  475.             byte[] pemPrime2 = new byte[64];  
  476.             Array.Copy(keyData, index, pemPrime2, 0, 64);  
  477.   
  478.             index  = 64;  
  479.             index  = ((int)keyData[index   1] == 64 ? 2 : 3);//412/413  
  480.             byte[] pemExponent1 = new byte[64];  
  481.             Array.Copy(keyData, index, pemExponent1, 0, 64);  
  482.   
  483.             index  = 64;  
  484.             index  = ((int)keyData[index   1] == 64 ? 2 : 3);//479/480  
  485.             byte[] pemExponent2 = new byte[64];  
  486.             Array.Copy(keyData, index, pemExponent2, 0, 64);  
  487.   
  488.             index  = 64;  
  489.             index  = ((int)keyData[index   1] == 64 ? 2 : 3);//545/546  
  490.             byte[] pemCoefficient = new byte[64];  
  491.             Array.Copy(keyData, index, pemCoefficient, 0, 64);  
  492.   
  493.             RSAParameters para = new RSAParameters();  
  494.             para.Modulus = pemModulus;  
  495.             para.Exponent = pemPublicExponent;  
  496.             para.D = pemPrivateExponent;  
  497.             para.P = pemPrime1;  
  498.             para.Q = pemPrime2;  
  499.             para.DP = pemExponent1;  
  500.             para.DQ = pemExponent2;  
  501.             para.InverseQ = pemCoefficient;  
  502.             return para;  
  503.         }  
  504.         #endregion  
  505.   
  506.     }  
  507. }  

 

 

   /// <summary>
    /// List转成json
    /// </summary>
    /// <typeparam name="T"></typeparam>
    /// <param name="jsonName"></param>
    /// <param name="list"></param>
    /// <returns></returns>
    public static string ListToJson<T>(IList<T> list, string jsonName)
    {
        StringBuilder Json = new StringBuilder();
        if (string.IsNullOrEmpty(jsonName))
            jsonName = list[0].GetType().Name;
        Json.Append("{"" jsonName "":[");
        if (list.Count > 0)
        {
            for (int i = 0; i < list.Count; i )
            {
                T obj = Activator.CreateInstance<T>();
                PropertyInfo[] pi = obj.GetType().GetProperties();
                Json.Append("{");
                for (int j = 0; j < pi.Length; j )
                {
                    Type type = pi[j].GetValue(list[i], null).GetType();
                    Json.Append(""" pi[j].Name.ToString() "":"

修改后

奥迪Q三SA算法测试代码:

 

[csharp] view plain copy

 

  1. using System;  
  2. using System.Collections.Generic;  
  3. using System.Text;  
  4. using RSA.Class;  
  5.   
  6. namespace payapi_demo.RSA  
  7. {  
  8.     class TestRSA  
  9.     {  
  10.         static void Main(string[] arg)  
  11.         {  
  12.   
  13.             /**CRUISERSA加密测试,SportageSA中的密钥对由此SSL工具生成,生成命令如下: 
  14.              * 1 生成RSA私钥: 
  15.              * openssl genrsa -out rsa_private_key.pem 1024 
  16.              *2 生成RSA公钥 
  17.              * openssl rsa -in rsa_private_key.pem -pubout -out rsa_public_key.pem 
  18.              * 
  19.              * 3 将帕杰罗SA私钥转变到PKCS⑧格式 
  20.              * openssl pkcs8 -topk8 -inform PEM -in rsa_private_key.pem -outform PEM -nocrypt -out rsa_pub_pk8.pem 
  21.              * 
  22.              * 直接展开rsa_private_key.pem和rsa_pub_pk捌.pem文件就可以收获密钥对剧情,获取密钥对剧情结合字符串时,注意将换行符删除 
  23.              * */  
  24.   
  25.             string publickey = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDzOqfNunFxFtCZPlq7fO/jWwjqmTvAooVBB4y87BizSZ9dl/F7FpAxYc6MmX2TqivCvvORXgdlYdFWAhzXOnIUv9OGG///WPLe9TMs9kIwAZ/APUXauvC01oFLnYkzwPlAh0tQ1Au9arTE/OG1V1dKgf8BXHLPhKL4BmGBEUZBtQIDAQAB";  
  26.             string privatekey = "MIICeQIBADANBgkqhkiG9w0BAQEFAASCAmMwggJfAgEAAoGBAPM6p826cXEW0Jk Wrt87 NbCOqZO8CihUEHjLzsGLNJn12X8XsWkDFhzoyZfZOqK8K 85FeB2Vh0VYCHNc6chS/04Yb//9Y8t71Myz2QjABn8A9Rdq68LTWgUudiTPA UCHS1DUC71qtMT84bVXV0qB/wFccs EovgGYYERRkG1AgMBAAECgYEA2PmnPdgnYKnolfvQ9tXiLaBFGPpvGk4grz0r6FB5TF7N4rErwxECunq0xioaowK4HPc40qHd2SvkkWQ7FCjYIDsnMk1oOhxNKn0J3FG0n5Cg1/dFai4eoXHs/nKn3SVZ8YZC1T2cMtN2srectLqNqhB8aQEe8xmykyUlUpg/qmECQQD9vkwjUotG5oUUrOj6etcB4WcdyyH0FtThKgyoJUDwgBv6lGGzWyFJEREvp47IgV FgC7zeP2mL4MhgnD3tNCZAkEA9WRrjOLBNc379XZpoDsH7rZjobVvhnTrEuRDx/whqZ vk64EPrEW81XYh647bAbJlFn2jPhY IUHkrxFEFT/fQJBAMoLNOULXQtfkqgb5odMONeue0Ul8itB4tBHgzyALW1TFPQ6InGGJsLfbCfd67uMCFts7fXAaXhibK/KBdm3iEECQQChwVAjzlUN4nnzk9qMhFz2PcPvFGovd2J9UXpcmRaXeWuDLXIe4Rz/ydaxmWgSDWdTIvoicpIzP31 fBwKZ/0BAkEAy0bh4weKmYF29//rK0sxmY8RtqkQeFrwWbqx1daa1w0DfWlNSvy47zyW1G5/AdZU6JSpXxlxdlM/HSDw v7kcA==";  
  27.   
  28.             //加密字符串  
  29.             string data = "yibao";  
  30.   
  31.             Console.WriteLine("加密前字符串内容:" data);  
  32.             //加密  
  33.             string encrypteddata = RSAFromPkcs8.encryptData(data, publickey, "UTF-8");  
  34.             Console.WriteLine("加密后的字符串为:"   encrypteddata);  
  35.             Console.WriteLine("解密后的字符串内容:"   BMWX3SAFromPkcs8.decryptData(encrypteddata, privatekey, "UTF-八"));  
  36.   
  37.             Console.WriteLine("***********");  
  38.   
  39.             //解密  
  40.             string endata = "LpnnvnfA72VnyjboX/OsCPO6FOFXeEnnsKkI7aAEQyVAPfCTfQ43ZYVZVqnADDPMW7VhBXJWyQMAGw2Fh9sS/XLHmO5XW94Yehci6JrJMynePgtIiDysjNA UlgSTC/MlResNrBm/4MMSPvq0qLwScgpZDynhLsVZk EQ6G8wgA=";  
  41.             string datamw = RSAFromPkcs8.decryptData(endata, privatekey, "UTF-8");  
  42.             Console.WriteLine("静态加密后的字符串为:"   endata);  
  43.             Console.WriteLine("解密后的字符串内容:"   datamw);  
  44.   
  45.             //签名  
  46.             string signdata = "YB010000001441234567286038508081299";  
  47.             Console.WriteLine("具名前的字符串内容:"   signdata);  
  48.             string sign = RSAFromPkcs8.sign(signdata, privatekey, "UTF-8");  
  49.             Console.WriteLine("具名后的字符串:"   sign);  
  50.   
  51.             Console.ReadLine();  
  52.         }  
  53.     }  
  54. }  
  • StringFormat(pi[j].GetValue(list[i], null).ToString(), type));
     
                        if (j < pi.Length - 1)
                        {
                            Json.Append(",");
                        }
                    }
                    Json.Append("}");
                    if (i < list.Count - 1)
                    {
                        Json.Append(",");
                    }
                }
            }
            Json.Append("]}");
            return Json.ToString();
        }
     
        /// <summary>
        /// List转成json
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="list"></param>
        /// <returns></returns>
        public static string ListToJson<T>(IList<T> list)
        {
            object obj = list[0];
            return ListToJson<T>(list, obj.GetType().Name);
        }
     
        /// <summary>
        /// 对象转变为Json字符串
        /// </summary>
        /// <param name="jsonObject">对象</param>
        /// <returns>Json字符串</returns>
        public static string ToJson(object jsonObject)
        {
            try
            {
                StringBuilder jsonString = new StringBuilder();
                jsonString.Append("{");
                PropertyInfo[] propertyInfo = jsonObject.GetType().GetProperties();
                for (int i = 0; i < propertyInfo.Length; i )
                {
                    object objectValue = propertyInfo[i].GetGetMethod().Invoke(jsonObject, null);
                    if (objectValue == null)
                    {
                        continue;
                    }
                    StringBuilder value = new StringBuilder();
                    if (objectValue is DateTime || objectValue is Guid || objectValue is TimeSpan)
                    {
                        value.Append(""" objectValue.ToString() """);
                    }
                    else if (objectValue is string)
                    {
                        value.Append(""" objectValue.ToString() """);
                    }
                    else if (objectValue is IEnumerable)
                    {
                        value.Append(ToJson((IEnumerable)objectValue));
                    }
                    else
                    {
                        value.Append(""" objectValue.ToString() """);
                    }
                    jsonString.Append(""" propertyInfo[i].Name "":" value ","); ;
                }
                return jsonString.ToString().TrimEnd(',') "}";
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
     
        /// <summary>
        /// 对象群集转变Json
        /// </summary>
        /// <param name="array">会集对象</param>
        /// <returns>Json字符串</returns>
        public static string ToJson(IEnumerable array)
        {
            string jsonString = "[";
            foreach (object item in array)
            {
                jsonString = ToJson(item) ",";
            }
            jsonString.Remove(jsonString.Length - 1, jsonString.Length);
            return jsonString "]";
        }
     
        /// <summary>
        /// 普通集结账和转账换Json
        /// </summary>
        /// <param name="array">集合对象</param>
        /// <returns>Json字符串</returns>
        public static string ToArrayString(IEnumerable array)
        {
            string jsonString = "[";
            foreach (object item in array)
            {
                jsonString = ToJson(item.ToString()) ",";
            }
            jsonString.Remove(jsonString.Length - 1, jsonString.Length);
            return jsonString "]";
        }
     
        /// <summary>
        /// Datatable转换为Json
        /// </summary>
        /// <param name="table">Datatable对象</param>
        /// <returns>Json字符串</returns>
        public static string ToJson(DataTable dt)
        {
            StringBuilder jsonString = new StringBuilder();
            jsonString.Append("[");
            DataRowCollection drc = dt.Rows;
            for (int i = 0; i < drc.Count; i )
            {
                jsonString.Append("{");
                for (int j = 0; j < dt.Columns.Count; j )
                {
                    string strKey = dt.Columns[j].ColumnName;
                    string strValue = drc[i][j].ToString();
                    Type type = dt.Columns[j].DataType;
                    jsonString.Append(""" strKey "":");
                    strValue = StringFormat(strValue, type);
                    if (j < dt.Columns.Count - 1)
                    {
                        jsonString.Append(strValue ",");
                    }
                    else
                    {
                        jsonString.Append(strValue);
                    }
                }
                jsonString.Append("},");
            }
            jsonString.Remove(jsonString.Length - 1, 1);
            jsonString.Append("]");
            return jsonString.ToString();
        }
     
        /// <summary>
        /// DataTable转成Json
        /// </summary>
        /// <param name="jsonName"></param>
        /// <param name="dt"></param>
        /// <returns></returns>
        public static string ToJson(DataTable dt, string jsonName)
        {
            StringBuilder Json = new StringBuilder();
            if (string.IsNullOrEmpty(jsonName))
                jsonName = dt.TableName;
            Json.Append("{"" jsonName "":[");
            if (dt.Rows.Count > 0)
            {
                for (int i = 0; i < dt.Rows.Count; i )
                {
                    Json.Append("{");
                    for (int j = 0; j < dt.Columns.Count; j )
                    {
                        Type type = dt.Rows[i][j].GetType();
                        Json.Append(""" dt.Columns[j].ColumnName.ToString() "":" StringFormat(dt.Rows[i][j].ToString(), type));
                        if (j < dt.Columns.Count - 1)
                        {
                            Json.Append(",");
                        }
                    }
                    Json.Append("}");
                    if (i < dt.Rows.Count - 1)
                    {
                        Json.Append(",");
                    }
                }
            }
            Json.Append("]}");
            return Json.ToString();
        }
     
        /// <summary>
        /// DataReader转换为Json
        /// </summary>
        /// <param name="dataReader">DataReader对象</param>
        /// <returns>Json字符串</returns>
        public static string ToJson(IDataReader dataReader)
        {
            StringBuilder jsonString = new StringBuilder();
            jsonString.Append("[");
     
            while (dataReader.Read())
            {
                jsonString.Append("{");
                for (int i = 0; i < dataReader.FieldCount; i )
                {
                    Type type = dataReader.GetFieldType(i);
                    string strKey = dataReader.GetName(i);
                    string strValue = dataReader[i].ToString();
                    jsonString.Append(""" strKey "":");
                    strValue = StringFormat(strValue, type);
                    if (i < dataReader.FieldCount - 1)
                    {
                        jsonString.Append(strValue ",");
                    }
                    else
                    {
                        jsonString.Append(strValue);
                    }
                }
                jsonString.Append("},");
            }
            dataReader.Close();
            jsonString.Remove(jsonString.Length - 1, 1);
            jsonString.Append("]");
            if (jsonString.Length == 1)
            {
                return "[]";
            }
            return jsonString.ToString();
        }
     
        /// <summary>
        /// DataSet转换为Json
        /// </summary>
        /// <param name="dataSet">DataSet对象</param>
        /// <returns>Json字符串</returns>
        public static string ToJson(DataSet dataSet)
        {
            string jsonString = "{";
            foreach (DataTable table in dataSet.Tables)
            {
                jsonString = """ table.TableName "":" ToJson(table) ",";
            }
            jsonString = jsonString.TrimEnd(',');
            return jsonString "}";
        }
     
        /// <summary>
        /// 过滤特殊字符
        /// </summary>
        /// <param name="s"></param>
        /// <returns></returns>
        private static string String2Json(String s)
        {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < s.Length; i )
            {
                char c = s.ToCharArray()[i];
                switch (c)
                {
                    case '"':
                        sb.Append("\""); break;
                    case '\':
                        sb.Append("\\"); break;
                    case '/':
                        sb.Append("\/"); break;
                    case 'b':
                        sb.Append("\b"); break;
                    case 'f':
                        sb.Append("\f"); break;
                    case 'n':
                        sb.Append("\n"); break;
                    case 'r':
                        sb.Append("\r"); break;
                    case 't':
                        sb.Append("\t"); break;
                    default:
                        sb.Append(c); break;
                }
            }
            return sb.ToString();
        }
     
        /// <summary>
        /// 格式化字符型、日期型、布尔型
        /// </summary>
        /// <param name="str"></param>
        /// <param name="type"></param>
        /// <returns></returns>
        private static string StringFormat(string str, Type type)
        {
            if (type != typeof(string) && string.IsNullOrEmpty(str))
            {
                str = """ str """;
            }
            else if (type == typeof(string))
            {
                str = String2Json(str);
                str = """ str """;
            }
            else if (type == typeof(DateTime))
            {
                str = """ str.Split(' ')[0] """;
            }
            else if (type == typeof(bool))
            {
                str = str.ToLower();
            }
     
            return str;
        }

 public class DirtyWordOper
    {
        private static Dictionary<string, object> hash = new Dictionary<string, object>();
        private static BitArray firstCharCheck = new BitArray(char.MaxValue);//把脏词的第3个字符记录下来
        private static BitArray allCharCheck = new BitArray(char.马克斯Value);//把每种个脏词的具有字符都记录下来
        private static int maxLength = 0;//
        private static bool onlyOne = true;

 

        #region
        /// <summary>
        /// 重临替换后的字符串 字符串的长度不改变
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public string Replace(string text)
        {
            if (onlyOne)
            {
                Init();//开头化数据 推行二回就不会实行了
                onlyOne = false;
            }
            if (!isDirtyword(text))
            {
                return text;
            }
            //获取替换操作表
            List<DetailRepModel> drlist = GetList(text);
            //试行替换操作
            return Replace2(text, drlist);
        }

summary /// List转成json /// /summary /// typeparam name=T/typeparam /// param name=jsonName/param /// param name=list/param /// returns/returns public static string Li...

        /// <summary>
        /// 初叶化用  只举行一次
        /// </summary>
        /// <param name="text"></param>
        private static void Init()
        {
            string[] badwords = DirtyWordData.DirtyKeyword.Split('|');
            foreach (string bw in badwords)
            {
                string[] strarrtemp = bw.Split('&');
                string word = strarrtemp[0];
                word = word.Trim();//去掉数据中的空格及格式 符号
                word = word.Replace("/r", "");
                word = word.Replace("/n", "");
                if (word == "")
                {
                    break;
                }
                if (!hash.ContainsKey(word))
                {
                    hash.Add(word, null);
                    maxLength = Math.Max(maxLength, word.Length);
                    firstCharCheck[word[0]] = true;

                    foreach (char c in word)
                    {
                        allCharCheck[c] = true;
                    }
                }
            }
        }
        /// <summary>
        /// 是还是不是含有 了 脏 词
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        private static bool isDirtyword(string text)
        {
            int index = 0;
            //int offset = 0;
            while (index < text.Length)
            {
                //假使第多个字符都不合乎
                if (!firstCharCheck[text[index]])
                {// 直接找到与脏词第壹字符一样甘休
                    while (index < text.Length - 1 && !firstCharCheck[text[ index]]) ;
                }
                for (int j = 1; j <= Math.Min(maxLength, text.Length

  • index); j )
                    {
                        if (!allCharCheck[text[index j - 1]])
                        {
                            break;
                        }
                        string sub = text.Substring(index, j);
                        //判断脏字字典中是否包蕴了脏词
                        if (hash.ContainsKey(sub))
                        {
                            return true;//是
                        }
                    }
                    index ;
                }
                return false;//否
            }

        /// <summary>
        /// 重回操作列表
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        private static List<DetailRepModel> GetList(string text)
        {
            List<DetailRepModel> DetailList = new List<DetailRepModel>();
            int index = 0;
            while (index < text.Length)
            {
                if (!firstCharCheck[text[index]])
                {
                    while (index < text.Length - 1 && !firstCharCheck[text[ index]]) ;
                }
                DetailRepModel tempDetail = null;
                for (int j = 1; j <= Math.Min(maxLength, text.Length

  • index); j )
                    {
                        if (!allCharCheck[text[index j - 1]])
                        {
                            if (tempDetail != null)
                            {//优先先字符串替换
                                index = index tempDetail.number - 一;//索引要赶回上一位,所以要减一
                                DetailList.Add(tempDetail);
                            }
                            break;
                        }
                        string sub = text.Substring(index, j);
                        if (hash.ContainsKey(sub))
                        {
                            tempDetail = new DetailRepModel();
                            tempDetail.index = index;
                            tempDetail.number = sub.Length;
                            tempDetail.content = sub;
                            //break;//进行下二回 否则要出现, abc 在那之中ab 与a都至关首要字要生成三个操作                      
                        }
                        if (tempDetail != null)
                        {
                            if (j 1 > Math.Min(maxLength, text.Length - index))
                            {//优先先字符串替换
                                DetailList.Add(tempDetail);
                                index = index tempDetail.number - 1;//索引要赶回上一个人,所以要减1
                            }
                        }
                    }
                    index ;
                }
                return DetailList;
            }
            /// <summary>
            /// 传入 字串和 脏字替换操作表,
            /// </summary>
            /// <param name="text"></param>
            /// <param name="drlist"></param>
            /// <returns> 输出替换后的字串</returns>
            private static string Replace2(string text, List<DetailRepModel> drlist)
            {

            if (drlist == null || drlist.Count == 0 || text == "")
            {
                return text;
            }
            foreach (DetailRepModel dr in drlist)
            {
                if (dr != null)
                {
                    string strtemp = text.Substring(dr.index, dr.number);
                    object ob = DirtyWordData.DirtyHT[(object)strtemp];
                    if (ob == null)
                    {
                        //记录错误
                        break;
                    }
                    // 那样替换 有错误 ,
                    text = text.Substring(0, dr.index) ob.ToString() text.Substring(dr.index dr.number);
                    //text = text.Replace(strtemp, ob.ToString());
                }
            }
            return text;
        }
        #endregion
    }

 

功效还不错, 不过我们非常给本身说了个艺术更NB,说比这种要快50倍;只是写起来有一点点艰苦

 

 public interface IReplaceDW
    {
        string Replace(string s);
    }
    public class ReplaceDW
    {
        public static void AddToWords(DirtyChar parent, string s, string t)
        {
            DirtyChar dc = parent.Children.Find(o => o.Orienginal == s[0]);
            if (dc == null)
            {
                dc = new DirtyChar() { Orienginal = s[0], Children = new List<DirtyChar>(), Target = "" };
                parent.Children.Add(dc);
            }
            if (s.Length > 1)
            {//
                AddToWords(dc, s.Substring(1), t);
            }
            else
            {
                dc.Target = t;
            }
        }

        public static string BuildChildren(DirtyChar dc, int deepLevel)
        {
            StringBuilder sb = new StringBuilder();
            string spaces = new string(' ', deepLevel 4);

            if (dc.Children.Count > 0)
            {
                sb.Append(@"
" spaces @"if (i 1 == len){");
                sb.Append(@"
" spaces @"    sb.Append(""" dc.Target @""");
                ");
                sb.Append(@"
" spaces @"    i ;
" spaces @"    break;}");
                sb.Append(@"
" spaces @" switch (s[i " deepLevel.ToString() @"])
" spaces @" {
");
                foreach (DirtyChar c in dc.Children)
                {
                    sb.Append(@"
" spaces @"  case '" c.Orienginal @"':
");
                    sb.Append(BuildChildren(c, deepLevel 1));
                    sb.Append(@"
" spaces @"   break;");
                }
              
              
                sb.Append(@"
" spaces @" default:
" spaces @"    sb.Append(""" dc.Target @""");
" spaces @"    i ;
" spaces @"    break;
" spaces @" }
");
            }
            else
            {
                sb.Append(@"
" spaces @"  sb.Append(""" dc.Target @""");
");
                if (deepLevel == 1)
                {
                    sb.Append(@"
" spaces @"  i ;
");
                }
                else
                {
                    sb.Append(@"
" spaces @"  i = " (deepLevel).ToString() @";
");
                }
            }
            return sb.ToString();
        }

        private IReplaceDW _r = null;
        private static bool isfirst = true;
        public string Replace(string s)
        {
            return _r.Replace(s);
        }
        private static List<KeyValuePair<string, string>> tmp = new List<KeyValuePair<string, string>>();
        public ReplaceDW()
        {
            if (isfirst)
            {              
                List<KeyValuePair<string, string>> dict = new List<KeyValuePair<string, string>>();
                foreach (DictionaryEntry d in KeyWord.DirtyWordData.DirtyHT)
                {
                    dict.Add(new KeyValuePair<string, string>(d.Key.ToString(), d.Value.ToString()));
                }
                // 整理进 list
                //List<KeyValuePair<string, string>> tmp = new List<KeyValuePair<string, string>>();
                foreach (KeyValuePair<string, string> kv in dict)
                {
                    tmp.Add(kv);
                }
                // 倒排
                tmp.Sort((a, b) => { return b.Key.CompareTo(a.Key); });
                isfirst = false;
            }
            var compiler = new CSharpCodeProvider();
            var options = new CompilerParameters();

            // set compile options  
            options.CompilerOptions = "/o";
            options.GenerateExecutable = false;
            options.GenerateInMemory = true;
            options.ReferencedAssemblies.Add("System.dll");
            options.ReferencedAssemblies.Add(this.GetType().Assembly.Location);

            // set the source code to compile  
            DirtyChar words = new DirtyChar() { Children = new List<DirtyChar>() };
            //DirtyChar words2 = new DirtyChar();
            //words2.Children = new List<DirtyChar>();
            foreach (KeyValuePair<string, string> kv in tmp)
            {//创设字典表
                AddToWords(words, kv.Key, kv.Value);
            }

            StringBuilder sb = new StringBuilder();
            sb.Append(@"
using System;  
namespace KeyWord
{
public class ReplaceDW_ : IReplaceDW
{  
    public string Replace( string s )
 {  
  int len = s.Length, i = 0;
        System.Text.StringBuilder sb = new System.Text.StringBuilder(len);
");
            sb.Append(@"
  while (i < len)
  {
   switch (s[i])
   {
");
            foreach (DirtyChar c in words.Children)
            {
                sb.Append(@"
    case '" c.Orienginal @"':
");
                sb.Append(BuildChildren(c, 1));
                sb.Append(@"
     break;");
            }
            sb.Append(@"
    default:
     sb.Append(s[i ]);
     break;
   }
  }
");
            sb.Append(@"
  return sb.ToString();

    }  
}
}");
            // compile the code, on-the-fly  
            var result = compiler.CompileAssemblyFromSource(options, sb.ToString());
           
            foreach (var error in result.Errors)
            {
                // print errors  
                ;
            }

            // if compilation sucessed  
            if ((!result.Errors.HasErrors) && (result.CompiledAssembly != null))
            {
                var type = result.CompiledAssembly.GetType("KeyWord.ReplaceDW_");
                try
                {
                    if (type != null)
                    {
                        this._r = Activator.CreateInstance(type) as IReplaceDW;
                    }
                    this.Replace("x"); //预热
                    this.Replace("x"); //预热
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            }
        }
    }

 

本文由澳门新浦京娱乐场网站发布于www.146.net,转载请注明出处:澳门新浦京娱乐场网站:脏字过滤算法,爬虫学