请协助和审查以下任务。使用Bing Map服务器可以找到许多地理数据。根据结果,收集数据、解析bing响应并将其存储在两个文件中的一个是必要的。我需要使它并行,因为它可能会有很多数据。我不喜欢我如何平行的代码,所以我将特别感谢对改进意见。下面是代码
using Newtonsoft.Json;
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Collections;
using System.Diagnostics;
namespace BingGeoLocations
{
class Program
{
private static string _folder = @"D:\TempFolder\";
private static object _consoleLock = new object();
private static object _successfileLock = new object();
private static object _failurefileLock = new object();
static void Main(string[] args)
{
var successGeoLocationCount = 0;
var failedGeoLocationCount = 0;
var allLocations = GetDocumentLocations().ToList();
var allLocationsCount = allLocations.Count();
var timer = new Stopwatch();
timer.Start();
var reportLocker = new object();
Parallel.For(0, allLocations.Count,
(i) =>
{
try
{
var bingLocations = GetBingLocations(allLocations[i]);
Interlocked.Increment(ref successGeoLocationCount);
StoreSuccessResults(allLocations[i], bingLocations);
}
catch (Exception ex)
{
Interlocked.Increment(ref failedGeoLocationCount);
StoreFailure(allLocations[i], ex.Message);
}
lock (reportLocker)
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write($"Processed {successGeoLocationCount + failedGeoLocationCount} locations out of { allLocationsCount}. Successful - {successGeoLocationCount}. Failed - {failedGeoLocationCount}");
}
});
timer.Stop();
Console.WriteLine();
Console.WriteLine($"Total execution time - {timer.ElapsedMilliseconds}.");
Console.ReadLine();
}
private static void StoreFailure(string docLocation, string failureDescription)
{
var failureFileName = "geography_failed.txt";
if (docLocation != null)
{
var newInfo = new StringBuilder();
newInfo.AppendLine(String.Join(";", new string[] { docLocation, failureDescription }));
lock (_failurefileLock)
{
using (StreamWriter writer = File.AppendText(_folder + failureFileName))
{
writer.Write(newInfo.ToString());
}
}
}
}
private static void StoreSuccessResults(string docLocation, IEnumerable bingLocations)
{
var successFileName = "geography_success.txt";
if (docLocation != null && bingLocations != null && bingLocations.Count() > 0)
{
var newInfo = new StringBuilder();
foreach (BingLocation bingLoc in bingLocations)
{
newInfo.AppendLine(String.Join(";", new string[] {
docLocation, bingLoc.CountryRegion, bingLoc.AdminDistrict, bingLoc.AdminDistrict2 }));
}
lock (_successfileLock)
{
using (StreamWriter writer = File.AppendText(_folder + successFileName))
{
writer.Write(newInfo.ToString());
}
}
}
}
static IEnumerable GetDocumentLocations()
{
var fileName = "geography.txt";
return File.ReadAllLines(fileName).Where(s => !String.IsNullOrWhiteSpace(s));
}
static IEnumerable GetBingLocations(string docLocation)
{
var result = new List();
var bingKey = "MySecretBingKey";
using (HttpClient client = new HttpClient())
{
var response = client.GetStringAsync("http://dev.virtualearth.net/REST/v1/Locations?q=" + Uri.EscapeDataString(docLocation) + "&c=en-US&maxResults=10&key=" + bingKey).Result;
dynamic responseObject = JsonConvert.DeserializeObject(response);
var statusCode = responseObject.statusCode;
if (statusCode != "200")
{
throw new Exception("Status code is not 200.");
}
var highConfidenceResources = ((IEnumerable)responseObject.resourceSets[0].resources).Cast().Where(p => p.confidence.ToString().ToUpper() == "HIGH").ToList();
if (highConfidenceResources.Count == 0)
{
throw new Exception("There are not High Confident results.");
}
foreach (dynamic res in highConfidenceResources)
{
var bingLocation = new BingLocation();
bingLocation.AdminDistrict = res.address.adminDistrict;
bingLocation.CountryRegion = res.address.countryRegion;
if (res.address.adminDistrict2 != null)
{
bingLocation.AdminDistrict2 = res.address.adminDistrict2;
}
else
{
bingLocation.AdminDistrict2 = res.address.locality;
}
result.Add(bingLocation);
}
}
return result;
}
}
}
public class BingLocation
{
public string CountryRegion;
public string AdminDistrict;
public string AdminDistrict2;
}发布于 2018-05-08 14:11:59
我建议使用必应映射JSON数据契约类来反序列化响应:
private static BingMapsRESTService.Common.JSON.Response DeserializeResponse(string json)
{
using (var tempStream = new MemoryStream(Encoding.UTF8.GetBytes(json)))
{
var serializer = new DataContractJsonSerializer(typeof(BingMapsRESTService.Common.JSON.Response));
var response = (BingMapsRESTService.Common.JSON.Response)serializer.ReadObject(tempStream);
return response;
}
}我还建议使用TPL,因为它可以通过与一些简单的构建块并行运行步骤来增加并发性。数据流确实包括一些样板设置代码,但它很容易实现和重新组织块。
下面是您的程序的数据流实现:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Runtime.Serialization.Json;
using System.Text;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
namespace BingGeoLocations
{
class Program
{
public static async Task Main(string[] args)
{
var locationsFileName = "geography.txt";
var outputFolder = @"D:\TempFolder\";
var successFileName = Path.Combine(outputFolder, "geography_success.txt");
var failureFileName = Path.Combine(outputFolder, "geography_failed.txt");
var bingLocationQueryOptions = new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded,
BoundedCapacity = 1000, // Limits the memory usage
};
var queryBingLocationBlock = new TransformBlock(
async location => await GetBingLocationsAsync(location),
bingLocationQueryOptions
);
var processResultOptions = new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 1, // Serialize so we can write to file/console
BoundedCapacity = 500, // Limits the memory usage
};
var processSucceededQueryBlock = new TransformBlock(
result =>
{
File.AppendAllText(successFileName, result.ToResultString());
return result;
},
processResultOptions
);
var processFailedQueryBlock = new TransformBlock(
result =>
{
File.AppendAllText(failureFileName, result.ToResultString());
return result;
},
processResultOptions
);
var propagateCompletionOptions = new DataflowLinkOptions
{
PropagateCompletion = true,
};
queryBingLocationBlock.LinkTo(processSucceededQueryBlock, propagateCompletionOptions, query => query.Succeeded);
queryBingLocationBlock.LinkTo(processFailedQueryBlock, propagateCompletionOptions, query => !query.Succeeded);
var locations = File.ReadLines(locationsFileName)
.Where(l => !String.IsNullOrWhiteSpace(l))
.ToList();
var totalQueries = locations.Count;
var failedQueriesCount = 0;
var succeededQueriesCount = 0;
var outputQueryProgressBlock = new ActionBlock(
result =>
{
if (result.Succeeded)
{
succeededQueriesCount += 1;
}
else
{
failedQueriesCount += 1;
}
Console.SetCursorPosition(Console.CursorLeft, Console.CursorTop);
Console.Write($"Processed {succeededQueriesCount + failedQueriesCount} locations out of { totalQueries}. Successful - {succeededQueriesCount}. Failed - {failedQueriesCount}.");
},
processResultOptions
);
processSucceededQueryBlock.LinkTo(outputQueryProgressBlock);
processFailedQueryBlock.LinkTo(outputQueryProgressBlock);
// When completion goes from multiple block to a single block
// we can't use link propagation and have to do it ourself.
Task.WhenAll(processSucceededQueryBlock.Completion, processFailedQueryBlock.Completion).ContinueWith(_ => outputQueryProgressBlock.Complete());
var timer = Stopwatch.StartNew();
foreach (var location in locations)
{
await queryBingLocationBlock.SendAsync(location);
}
queryBingLocationBlock.Complete();
await outputQueryProgressBlock.Completion;
Console.WriteLine();
Console.WriteLine($"Total execution time - {timer.ElapsedMilliseconds}.");
Console.ReadLine();
}
internal sealed class BingLocation
{
public string CountryRegion { get; set; }
public string AdminDistrict { get; set; }
public string AdminDistrict2 { get; set; }
}
internal sealed class BingLocationException : Exception
{
public BingLocationException(string message)
: base(message)
{ }
}
internal abstract class BingLocationQuery
{
public string Location { get; }
public bool Succeeded { get; }
protected BingLocationQuery(string location, bool succeeded)
{
if (string.IsNullOrWhiteSpace(location))
{
throw new ArgumentException(nameof(location) + " cannot be null or white space.", nameof(location));
}
Location = location;
Succeeded = succeeded;
}
public abstract string ToResultString();
}
internal sealed class BingLocationQuerySuccess : BingLocationQuery
{
public List BingLocations { get; }
public BingLocationQuerySuccess(string location, List bingLocations)
: base(location, succeeded: true)
{
BingLocations = bingLocations;
}
public override string ToResultString()
{
var resultStringBuilder = new StringBuilder();
foreach (var bingLoc in BingLocations)
{
resultStringBuilder
.Append(Location)
.Append(';')
.Append(bingLoc.CountryRegion)
.Append(';')
.Append(bingLoc.AdminDistrict)
.Append(';')
.Append(bingLoc.AdminDistrict2)
.AppendLine();
}
return resultStringBuilder.ToString();
}
}
internal sealed class BingLocationQueryFailure : BingLocationQuery
{
public string FailureDescription { get; }
public BingLocationQueryFailure(string location, string failureDescription)
: base(location, succeeded: false)
{
FailureDescription = failureDescription;
}
public override string ToResultString()
{
var resultStringBuilder = new StringBuilder();
resultStringBuilder
.Append(Location)
.Append(';')
.Append(FailureDescription);
return resultStringBuilder.ToString();
}
}
private static async Task GetBingLocationsAsync(string docLocation)
{
const string bingKey = "MySecretBingKey";
using (var client = new HttpClient())
{
var jsonResponse = await client.GetStringAsync("https://dev.virtualearth.net/REST/v1/Locations?q=" + Uri.EscapeDataString(docLocation) + "&c=en-US&maxResults=10&key=" + bingKey);
var response = DeserializeResponse(jsonResponse);
var statusCode = response.StatusCode;
if (statusCode != 200)
{
return new BingLocationQueryFailure(docLocation, "Status code is not 200.");
}
var highConfidenceResources =
response.ResourceSets[0]
.Resources
.OfType()
.Where(p => string.Equals(p.Confidence.ToString(), "HIGH", StringComparison.OrdinalIgnoreCase))
.ToList();
if (highConfidenceResources.Count == 0)
{
return new BingLocationQueryFailure(docLocation, "There are not High Confident results.");
}
var bingLocations = highConfidenceResources
.Select(res => new BingLocation
{
AdminDistrict = res.Address.AdminDistrict,
CountryRegion = res.Address.CountryRegion,
AdminDistrict2 = res.Address.AdminDistrict2 ?? res.Address.Locality,
})
.ToList();
return new BingLocationQuerySuccess(docLocation, bingLocations);
}
}
private static BingMapsRESTService.Common.JSON.Response DeserializeResponse(string json)
{
using (var tempStream = new MemoryStream(Encoding.UTF8.GetBytes(json)))
{
var serializer = new DataContractJsonSerializer(typeof(BingMapsRESTService.Common.JSON.Response));
var response = (BingMapsRESTService.Common.JSON.Response)serializer.ReadObject(tempStream);
return response;
}
}
}
}发布于 2018-05-08 08:34:25
您忘了删除_consoleLock。不再用了。我想它已经被范围较小的reportLocker所取代了。当然是件好事。但这个名字不符合其他锁具的约定。
您可以使用Parallel.For在allLocations上迭代。你可以用Parallel.ForEach代替。
您可以使用泛型Exception类型。这样就能捕捉到试图捕获块中的所有可能的异常。最好创建自己的BingLocationRequestException。因此,要确保任何其他类型的异常都会出现,而不是计算failedGeoLocationCount。
successGeoLocationCount与failedGeoLocationCount的命名约定不匹配。successfulGeoLocationCount会更好。
您可以使用Stopwatch.StartNew()静态函数创建一个新的秒表并立即启动它。
StoreFailure(...)和StoreSuccessResult(...)应该会提前失败。不要检查docLocation是否为null,当为true时继续检查,而是检查它是否为null并立即返回。从而减少了压痕。
failureFileName和successFileName以及'fileName‘可以是静态的。
考虑使用File.AppendAllText立即将字符串附加到其路径所引用的文件中。
GetDocumentLocations()和GetBingLocations(...)缺少它们的访问修饰符。
您对var的使用有一点不一致。例如,在StoreSuccessResults(...) foreach循环中,可以指定元素的确切类型。
类BingLocation不在BingGeoLocations命名空间中。
不过,我不会对dynamic的使用发表评论。我假设您在HTTP请求中使用了一个片段。也许在这里使用dynamic实际上是正确的方法。但以我的经验,最好确保它真的有必要使用它。
https://codereview.stackexchange.com/questions/193861
复制相似问题